Rush has submitted this change and it was merged.
Change subject: Add python version of maintain-replicas script
......................................................................
Add python version of maintain-replicas script
This does not include functionality that was split into maintain-meta_p.py last
year.
This (so far) deals with the following issues:
* It was written in Perl.
* Configuration was part of the script.
* It tried to parse ~/.my.cnf instead of getting that data from it's own
configuration.
* Configuration file couldn't be specified on the CLI
* It is now commented more.
* It allows specifying one or multiple databases to work on, instead of just
running across all.dblist.
* It does much less work done in file scope.
* It has a "dry run" mode.
* It is more verbose about what it's not doing.
* It waits for confirmation before replacing existing views, by default.
* It clones mediawiki-config for you if it doesn't already exist.
Bug: T138450
Change-Id: Icc51fc9a23f2fcf4c19c7adc092662d7122471e1
---
A maintain-replicas/config.json
A maintain-replicas/maintain-replicas.py
M tox.ini
3 files changed, 530 insertions(+), 1 deletion(-)
Approvals:
Rush: Looks good to me, approved
jenkins-bot: Verified
diff --git a/maintain-replicas/config.json b/maintain-replicas/config.json
new file mode 100644
index 0000000..6d0ec78
--- /dev/null
+++ b/maintain-replicas/config.json
@@ -0,0 +1,156 @@
+{
+ "slices": [
+ ["labsdb1001.eqiad.wmnet", 3306],
+ ["labsdb1002.eqiad.wmnet", 3306],
+ ["labsdb1003.eqiad.wmnet", 3306]
+ ],
+ "mysql_user": "root",
+ "mysql_password": "password",
+ "fullviews": [
+ "abuse_filter_action", "abuse_filter_history",
+ "aft_article_answer", "aft_article_answer_text",
+ "aft_article_feedback_properties",
"aft_article_feedback_ratings_rollup",
+ "aft_article_feedback_select_rollup", "aft_article_field",
"aft_article_field_group",
+ "aft_article_field_option", "aft_article_filter_count",
"aft_article_revision_feedback_ratings_rollup",
+ "aft_article_revision_feedback_select_rollup", "article_assessment",
"article_assessment_pages",
+ "article_assessment_ratings", "article_feedback",
"article_feedback_pages",
+ "article_feedback_properties", "article_feedback_ratings",
"article_feedback_revisions",
+ "article_feedback_stats", "article_feedback_stats_types", "category",
"categorylinks", "change_tag",
+ "ep_articles", "ep_cas", "ep_courses", "ep_events", "ep_instructors",
"ep_oas", "ep_orgs",
+ "ep_revisions", "ep_students", "ep_users_per_course", "externallinks",
"flaggedimages",
+ "flaggedpage_config", "flaggedpage_pending", "flaggedpages",
"flaggedrevs", "flaggedrevs_promote",
+ "flaggedrevs_statistics", "flaggedrevs_stats", "flaggedrevs_stats2",
"flaggedrevs_tracking",
+ "flaggedtemplates", "geo_killlist", "geo_tags", "geo_updates",
"globalimagelinks", "global_block_whitelist",
+ "hashs", "hitcounter", "image", "imagelinks", "imagelinks_old",
"interwiki", "iwlinks",
+ "l10n_cache", "langlinks", "links", "localisation",
"localisation_file_hash",
+ "math", "module_deps", "msg_resource_links", "namespaces",
+ "page", "page_broken", "pagelinks", "page_props", "page_restrictions",
"pagetriage_log",
+ "pagetriage_page", "pagetriage_page_tags", "pagetriage_tags",
"pif_edits", "povwatch_log",
+ "povwatch_subscribers", "protected_titles", "pr_index", "redirect",
"renameuser_status", "site_identifiers",
+ "sites", "site_stats", "tag_summary", "templatelinks", "transcode",
"updatelog", "updates",
+ "user_former_groups", "user_groups", "valid_tag", "wikilove_image_log",
+ "wikilove_log", "global_group_permissions",
"global_group_restrictions", "global_user_groups",
+ "globalblocks", "localuser", "wikiset", "wb_changes",
"wb_changes_dispatch", "wb_changes_subscription",
+ "wb_entity_per_page", "wb_id_counters", "wb_items_per_site",
"wb_property_info", "wb_terms",
+ "wbc_entity_usage", "wbs_propertypairs", "watchlist_count"
+ ],
+ "logging_whitelist": [
+ "gblrights", "globalauth", "close", "lock", "gblblock", "managetags",
"gather", "campus",
+ "institution", "instructor", "online", "merge", "massmessage",
"course", "stable", "import",
+ "abusefilter", "student", "eparticle", "rights",
"pagetriage-deletion", "protect",
+ "pagetriage-curation", "thanks", "renameuser", "review", "block",
"upload", "move",
+ "delete", "patrol", "newusers"
+ ],
+ "customviews": {
+ "abuse_filter": {
+ "source": "abuse_filter",
+ "view": "select af_id, if(af_hidden,null,af_pattern) as
af_pattern, af_user, af_user_text, af_timestamp, af_enabled,
if(af_hidden,null,af_comments) as af_comments, af_public_comments, af_hidden,
af_hit_count, af_throttled, af_deleted, af_actions, af_global, af_group"
+ },
+ "abuse_filter_log": {
+ "source": "abuse_filter_log",
+ "view": "select afl_id, afl_filter, afl_user, afl_user_text, NULL
as afl_ip, afl_action, afl_actions, afl_var_dump, afl_timestamp, afl_namespace,
afl_title, afl_wiki, afl_deleted, afl_patrolled_by, afl_rev_id, afl_log_id"
+ },
+ "aft_article_feedback": {
+ "source": "aft_article_feedback",
+ "view": "select af_id, af_page_id, af_user_id, NULL as af_user_ip,
af_user_anon_token, af_revision_id, af_cta_id, af_link_id, af_created,
af_abuse_count, af_helpful_count, af_unhelpful_count, af_oversight_count,
af_is_deleted, af_is_hidden, af_net_helpfulness, af_has_comment,
af_is_unhidden, af_is_undeleted, af_is_declined, af_activity_count, af_form_id,
af_experiment, af_suppress_count, af_last_status, af_last_status_user_id,
af_last_status_timestamp, af_is_autohide, af_is_unrequested, af_is_featured,
af_is_unfeatured, af_is_resolved, af_is_unresolved, af_relevance_score,
af_relevance_sort, af_last_status_notes"
+ },
+ "archive": {
+ "source": "archive",
+ "view": "select ar_id, ar_namespace, ar_title, NULL as ar_text,
NULL as ar_comment, if(ar_deleted&4,null,ar_user) as ar_user,
if(ar_deleted&4,null,ar_user_text) as ar_user_text, ar_timestamp,
ar_minor_edit, ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) as
ar_text_id, ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id,
ar_parent_id, if(ar_deleted&1,null,ar_sha1) as ar_sha1"
+ },
+ "archive_userindex": {
+ "source": "archive",
+ "view": "select ar_id, ar_namespace, ar_title, NULL as ar_text,
NULL as ar_comment, ar_user, ar_user_text, ar_timestamp, ar_minor_edit,
ar_flags, ar_rev_id, if(ar_deleted&1,null,ar_text_id) as ar_text_id,
ar_deleted, if(ar_deleted&1,null,ar_len) as ar_len, ar_page_id, ar_parent_id,
if(ar_deleted&1,null,ar_sha1) as ar_sha1",
+ "where": "(ar_deleted&4)=0"
+ },
+ "filearchive": {
+ "source": "filearchive",
+ "view": "select fa_id, fa_name, fa_archive_name, fa_storage_group,
NULL as fa_storage_key, fa_deleted_user, fa_deleted_timestamp,
fa_deleted_reason, if(fa_deleted&1,null,fa_size) as fa_size,
if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) as
fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata,
if(fa_deleted&1,null,fa_bits) as fa_bits, if(fa_deleted&1,null,fa_media_type)
as fa_media_type, if(fa_deleted&1,null,fa_major_mime) as fa_major_mime,
if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime,
if(fa_deleted&2,null,fa_description) as fa_description,
if(fa_deleted&4,null,fa_user)as fa_user, if(fa_deleted&4,null,fa_user_text) as
fa_user_text, fa_timestamp, fa_deleted, if(fa_deleted&1,null,fa_sha1) as
fa_sha1"
+ },
+ "filearchive_userindex": {
+ "source": "filearchive",
+ "view": "select fa_id, fa_name, fa_archive_name, fa_storage_group,
NULL as fa_storage_key, fa_deleted_user, fa_deleted_timestamp,
fa_deleted_reason, if(fa_deleted&1,null,fa_size) as fa_size,
if(fa_deleted&1,null,fa_width) as fa_width, if(fa_deleted&1,null,fa_height) as
fa_height, if(fa_deleted&1,null,fa_metadata) as fa_metadata,
if(fa_deleted&1,null,fa_bits) as fa_bits, if(fa_deleted&1,null,fa_media_type)
as fa_media_type, if(fa_deleted&1,null,fa_major_mime) as fa_major_mime,
if(fa_deleted&1,null,fa_minor_mime) as fa_minor_mime,
if(fa_deleted&2,null,fa_description) as fa_description, fa_user, fa_user_text,
fa_timestamp, fa_deleted, if(fa_deleted&1,null,fa_sha1) as fa_sha1",
+ "where": "(fa_deleted&4)=0"
+ },
+ "globaluser": {
+ "source": "globaluser",
+ "view": "select gu_id, gu_name, gu_enabled, gu_enabled_method,
gu_home_db, NULL as gu_email, NULL as gu_email_authenticated, NULL as gu_salt,
NULL as gu_password, gu_locked, gu_hidden, gu_registration, NULL as
gu_password_reset_key, NULL as gu_password_reset_expiration, NULL as
gu_auth_token",
+ "where": "gu_hidden=''"
+ },
+ "ipblocks": {
+ "source": "ipblocks",
+ "view": "select ipb_id, if(ipb_auto<>0,null,ipb_address) as
ipb_address, ipb_user, ipb_by, ipb_reason, ipb_timestamp, ipb_auto,
ipb_anon_only, ipb_create_account, ipb_expiry,
if(ipb_auto<>0,null,ipb_range_start) as ipb_range_start,
if(ipb_auto<>0,null,ipb_range_end) as ipb_range_end, ipb_enable_autoblock,
ipb_deleted, ipb_block_email, ipb_by_text, ipb_allow_usertalk,
ipb_parent_block_id",
+ "where": "ipb_deleted=0"
+ },
+ "ipblocks_ipindex": {
+ "source": "ipblocks",
+ "view": "select ipb_id, ipb_address, ipb_user, ipb_by, ipb_reason,
ipb_timestamp, ipb_auto, ipb_anon_only, ipb_create_account, ipb_expiry,
ipb_range_start, ipb_range_end, ipb_enable_autoblock, ipb_deleted,
ipb_block_email, ipb_by_text, ipb_allow_usertalk, ipb_parent_block_id",
+ "where": "ipb_deleted=0 and ipb_auto=0"
+ },
+ "logging": {
+ "source": "logging",
+ "view": "select log_id, log_type,
if(log_deleted&1,null,log_action) as log_action, log_timestamp,
if(log_deleted&4,null,log_user) as log_user,
if(log_deleted&1,null,log_namespace) as log_namespace,
if(log_deleted&1,null,log_title) as log_title,
if(log_deleted&2,null,log_comment) as log_comment,
if(log_deleted,null,log_params) as log_params, log_deleted,
if(log_deleted&4,null,log_user_text) as log_user_text,
if(log_deleted&1,null,log_page) as log_page",
+ "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT
TO PULL FROM THE `logging_whitelist` CONFIG"
+ },
+ "logging_logindex": {
+ "source": "logging",
+ "view": "select log_id, log_type, log_action, log_timestamp,
if(log_deleted&4,null,log_user) as log_user, log_namespace, log_title,
if(log_deleted&2,null,log_comment) as log_comment,
if(log_deleted,null,log_params) as log_params, log_deleted,
if(log_deleted&4,null,log_user_text) as log_user_text, log_page",
+ "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT
TO PULL FROM THE `logging_whitelist` CONFIG"
+ },
+ "logging_userindex": {
+ "source": "logging",
+ "view": "select log_id, log_type,
if(log_deleted&1,null,log_action) as log_action, log_timestamp, log_user,
if(log_deleted&1,null,log_namespace) as log_namespace,
if(log_deleted&1,null,log_title) as log_title,
if(log_deleted&2,null,log_comment) as log_comment,
if(log_deleted,null,log_params) as log_params, log_deleted, log_user_text as
log_user_text, if(log_deleted&1,null,log_page) as log_page",
+ "where": "ANYTHING YOU WRITE HERE WILL BE OVERRIDDEN BY THE SCRIPT
TO PULL FROM THE `logging_whitelist` CONFIG"
+ },
+ "mark_as_helpful": {
+ "source": "mark_as_helpful",
+ "view": "select mah_id, mah_type, mah_item, mah_user_id,
mah_user_editcount, mah_namespace, mah_title, mah_timestamp, NULL as
mah_system_type, NULL as mah_user_agent, NULL as mah_locale"
+ },
+ "oldimage": {
+ "source": "oldimage",
+ "view": "select oi_name, oi_archive_name, oi_size, oi_width,
oi_height, oi_bits, if(oi_deleted&2,null,oi_description) as oi_description,
if(oi_deleted&4,null,oi_user) as oi_user, if(oi_deleted&4,null,oi_user_text) as
oi_user_text, oi_timestamp, oi_metadata, oi_media_type, oi_major_mime,
oi_minor_mime, oi_deleted, oi_sha1"
+ },
+ "oldimage_userindex": {
+ "source": "oldimage",
+ "view": "select oi_name, oi_archive_name, oi_size, oi_width,
oi_height, oi_bits, if(oi_deleted&2,null,oi_description) as oi_description,
oi_user, oi_user_text, oi_timestamp, oi_metadata, oi_media_type, oi_major_mime,
oi_minor_mime, oi_deleted, oi_sha1",
+ "where": "(oi_deleted&4)=0"
+ },
+ "recentchanges": {
+ "source": "recentchanges",
+ "view": "select rc_id, rc_timestamp, if(rc_deleted&4,null,rc_user)
as rc_user, if(rc_deleted&4,null,rc_user_text) as rc_user_text, rc_namespace,
rc_title, if(rc_deleted&2,null,rc_comment) as rc_comment, rc_minor, rc_bot,
rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type, rc_source,
rc_patrolled, null as rc_ip, rc_old_len, rc_new_len, rc_deleted, rc_logid,
rc_log_type, rc_log_action, rc_params"
+ },
+ "recentchanges_userindex": {
+ "source": "recentchanges",
+ "view": "select rc_id, rc_timestamp, rc_user, rc_user_text,
rc_namespace, rc_title, if(rc_deleted&2,null,rc_comment) as rc_comment,
rc_minor, rc_bot, rc_new, rc_cur_id, rc_this_oldid, rc_last_oldid, rc_type,
rc_source, rc_patrolled, null as rc_ip, rc_old_len, rc_new_len, rc_deleted,
rc_logid, rc_log_type, rc_log_action, rc_params",
+ "where": "(rc_deleted&4)=0"
+ },
+ "revision": {
+ "source": "revision",
+ "view": "select rev_id, rev_page,
if(rev_deleted&1,null,rev_text_id) as rev_text_id,
if(rev_deleted&2,null,rev_comment) as rev_comment,
if(rev_deleted&4,null,rev_user) as rev_user,
if(rev_deleted&4,null,rev_user_text) as rev_user_text, rev_timestamp,
rev_minor_edit, rev_deleted, if(rev_deleted&1,null,rev_len) as rev_len,
rev_parent_id, if(rev_deleted&1,null,rev_sha1) as rev_sha1, rev_content_model,
rev_content_format"
+ },
+ "revision_userindex": {
+ "source": "revision",
+ "view": "select rev_id, rev_page,
if(rev_deleted&1,null,rev_text_id) as rev_text_id,
if(rev_deleted&2,null,rev_comment) as rev_comment, rev_user, rev_user_text,
rev_timestamp, rev_minor_edit, rev_deleted, if(rev_deleted&1,null,rev_len) as
rev_len, rev_parent_id, if(rev_deleted&1,null,rev_sha1) as rev_sha1,
rev_content_model, rev_content_format",
+ "where": "(rev_deleted&4)=0"
+ },
+ "user": {
+ "source": "user",
+ "view": "select user_id, user_name, user_real_name, NULL as
user_password, NULL as user_newpassword, NULL as user_email, NULL as
user_options, NULL as user_touched, NULL as user_token, NULL as
user_email_authenticated, NULL as user_email_token, NULL as
user_email_token_expires, user_registration, NULL as user_newpass_time,
user_editcount, NULL as user_password_expires"
+ },
+ "user_old": {
+ "source": "user_old",
+ "view": "select user_id, user_name, NULL as user_password, NULL as
user_newpassword, NULL as user_email, NULL as user_options, NULL as
user_newtalk, NULL as user_touched, user_real_name, NULL as user_token"
+ },
+ "user_properties": {
+ "source": "user_properties",
+ "view": "select up_user, up_property, up_value",
+ "where": "up_property in ( 'disablemail', 'fancysig', 'gender',
'language', 'nickname', 'skin', 'timecorrection', 'variant' )"
+ },
+ "user_properties_anon": {
+ "limit": 2,
+ "source": [ "user_properties", "user",
"meta_p.properties_anon_whitelist" ],
+ "view": "select cast(extract(year_month from user_touched)*100+1
as date) upa_touched, up_property, up_value",
+ "where": "user_id=up_user and up_property like pw_property"
+ }
+ }
+}
diff --git a/maintain-replicas/maintain-replicas.py
b/maintain-replicas/maintain-replicas.py
new file mode 100644
index 0000000..3774d32
--- /dev/null
+++ b/maintain-replicas/maintain-replicas.py
@@ -0,0 +1,373 @@
+#! /usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Based on work by Marc-André Pelletier, ported to Python by Alex Monk
+# Copyright © 2016 Alex Monk <[email protected]>
+# Copyright © 2015 Alex Monk <[email protected]>
+# Copyright © 2013 Marc-André Pelletier <[email protected]>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+#
+# This script maintains the databases containing sanitized views to
+# the replicated databases (in the form <db>_p for every <db>)
+#
+# By default, it processes every database but it accepts a list of
+# databases to process
+#
+# The script uses a clone of operations/mediawiki-config in a directory called
+# mediawiki-config, from which it will get it's database list information. It
+# will clone the repository from https://gerrit.wikimedia.org if necessary.
+#
+
+import argparse
+import json
+import logging
+import os
+import re
+import subprocess
+
+import pymysql
+
+
+class SchemaOperations():
+ def __init__(self, dry_run, replace_all, db, db_size, cursor):
+ self.dry_run = dry_run
+ self.replace_all = replace_all
+ self.db = db
+ self.db_p = db + '_p'
+ self.db_size = db_size
+ self.cursor = cursor
+
+ def write_execute(self, query):
+ if self.dry_run:
+ logging.info("DRY RUN: Would execute: {}".format(query))
+ else:
+ self.cursor.execute(query)
+
+ def table_exists(self, table, database):
+ """
+ Determine whether a table of the given name exists in the database of
+ the given name.
+ """
+ self.cursor.execute("""
+ SELECT `table_name`
+ FROM `information_schema`.`tables`
+ WHERE `table_name`=%s AND `table_schema`=%s;
+ """, args=(table, database))
+ return bool(self.cursor.rowcount)
+
+ def execute(self, fullviews, customviews):
+ """
+ Begin checking/creating views for this schema.
+ """
+
+ self.cursor.execute("""
+ SELECT `schema_name`
+ FROM `information_schema`.`schemata`
+ WHERE `schema_name`=%s
+ """, args=(self.db_p,))
+ if not self.cursor.rowcount:
+ # Can't use pymysql to build this
+ self.write_execute(
+ "CREATE DATABASE `{}`;".format(self.db_p)
+ )
+
+ logging.info("Full views for {}:".format(self.db))
+ for view in fullviews:
+ self.do_fullview(view)
+
+ logging.info("Custom views for {}:".format(self.db))
+ for view_name, view_details in customviews.items():
+ self.do_customview(view_name, view_details)
+
+ def do_fullview(self, view):
+ """
+ Check whether the source table exists, and if so, create the view.
+ """
+ if self.table_exists(view, self.db):
+ # If it does, create or replace the view for it.
+ logging.info("[{}] ".format(view))
+ if (self.replace_all or not self.table_exists(view, self.db_p) or
+ input('View already exists. Replace? [y/N] ').lower()
+ in ['y', 'yes']):
+ # Can't use pymysql to build this
+ self.write_execute("""
+ CREATE OR REPLACE
+ DEFINER=viewmaster
+ VIEW `{0}`.`{1}`
+ AS SELECT * FROM `{2}`.`{1}`;
+ """.format(self.db_p, view, self.db))
+ else:
+ # Some views only exist in CentralAuth, some only in MediaWiki,
+ # etc.
+ logging.debug(
+ ("Skipping full view {} on database {} as the table does not"
+ " seem to exist.")
+ .format(view, self.db)
+ )
+
+ def check_customview_source(self, view_name, source):
+ """
+ Check whether a custom view's particular source exists. If it does,
+ return the source database and table names. If not, return False.
+ """
+ match = re.match(r"^(?:(.*)\.)?([^.]+)$", source)
+ if not match:
+ raise Exception(
+ "Custom view source does not look valid! Source: {}, view: {}"
+ .format(source, view_name)
+ )
+
+ # Effectively separate a db.table source into it's parts
+ source_db, source_table = match.groups()
+
+ # If there was no DB part, assume the current DB name
+ if source_db is None:
+ source_db = self.db
+
+ if self.table_exists(source_table, source_db):
+ # If it does, take this source into account.
+ return source_db, source_table
+ else:
+ logging.debug(
+ ("Failed to find table {} in database {} as a source for"
+ " view {}")
+ .format(source_table, source_db, view_name)
+ )
+ return False
+
+ def do_customview(self, view_name, view_details):
+ """
+ Process a custom view's sources, and if they're all present, and the
+ view's limit is not bigger than the database size, call
+ create_customview.
+ """
+ if ("limit" in view_details and self.db_size is not None and
+ view_details["limit"] > self.db_size):
+ # Ignore custom views which have a limit number greater
+ # than the size ID set by the read_list calls for
+ # size.dblist above.
+ logging.debug("Too big for this database")
+ return
+
+ sources = view_details["source"]
+ if sources.__class__ is str:
+ sources = [sources]
+
+ sources_checked = []
+
+ for source in sources:
+ result = self.check_customview_source(view_name, source)
+ if result:
+ source_db, source_table = result
+ sources_checked.append(
+ "`{}`.`{}`".format(source_db, source_table)
+ )
+ else:
+ break
+
+ if len(sources) == len(sources_checked):
+
+ if (self.replace_all or
+ not self.table_exists(view_name, self.db_p) or
+ input('View already exists. Replace? [y/N] ').lower()
+ in ['y', 'yes']):
+ logging.info("[{}] ".format(view_name))
+ self.create_customview(
+ view_name,
+ view_details,
+ sources_checked
+ )
+ else:
+ # If any source was not found, ignore this view.
+ # Some views only exist in CentralAuth, some only in MediaWiki,
+ # etc.
+ logging.debug(
+ ("Skipping custom view {} on database {} as not all sources"
+ " were verified.\nSources: {}\nVerified sources: {}")
+ .format(
+ view_name,
+ self.db,
+ str(view_details["source"]),
+ str(sources_checked)
+ )
+ )
+
+ def create_customview(self, view_name, view_details, sources):
+ """
+ Creates or replaces a custom view from it's sources.
+ """
+ query = """
+ CREATE OR REPLACE
+ DEFINER=viewmaster
+ VIEW `{}`.`{}`
+ AS {}
+ FROM {}
+ """.format(
+ self.db_p,
+ view_name,
+ view_details["view"],
+ ",".join(sources)
+ )
+
+ if "where" in view_details:
+ query += " WHERE {}\n".format(view_details["where"])
+ if "group" in view_details:
+ # Technically nothing is using this at the moment...
+ query += " GROUP BY {}\n".format(view_details["group"])
+
+ query += ";"
+
+ # Can't use pymysql to build this
+ self.write_execute(query)
+
+
+def do_dbhost(dry_run, replace_all, dbhost, dbport, config, dbs, customviews):
+ """
+ Handle setting up a connection to a dbhost, then go through every database
+ to start the process of creating views there.
+ """
+ dbh = pymysql.connect(
+ host=dbhost,
+ port=dbport,
+ user=config["mysql_user"],
+ passwd=config["mysql_password"],
+ charset="utf8"
+ )
+ with dbh.cursor() as cursor:
+ logging.info("Connected to {}:{}...".format(dbhost, dbport))
+ cursor.execute("SET NAMES 'utf8';")
+ for db, db_info in dbs.items():
+ skip = False
+ for bad_flag in ["deleted", "private"]:
+ if db_info.get(bad_flag, False):
+ logging.debug(
+ "Skipping database {} because it's marked as {}."
+ .format(db, bad_flag)
+ )
+ skip = True
+
+ if skip:
+ continue
+
+ db_size = None
+ if "size" in db_info:
+ db_size = db_info["size"]
+
+ ops = SchemaOperations(dry_run, replace_all, db, db_size, cursor)
+ ops.execute(config["fullviews"], customviews)
+
+if __name__ == "__main__":
+ logging.getLogger().setLevel(logging.INFO)
+
+ argparser = argparse.ArgumentParser(
+ "maintain-replicas",
+ description="Maintain labs sanitized views of replica databases"
+ )
+ argparser.add_argument(
+ "--config-location",
+ help="Path to find the configuration file",
+ default="config.json"
+ )
+ argparser.add_argument(
+ "--dry-run",
+ help=("Give this parameter if you don't want the script to actually"
+ " make changes."),
+ action="store_true"
+ )
+ argparser.add_argument(
+ "--replace-all",
+ help=("Give this parameter if you don't want the script to prompt"
+ " before replacing views."),
+ action="store_true"
+ )
+ argparser.add_argument(
+ "--databases",
+ help=("Specify database(s) to work on, instead of all. Multiple"
+ " values can be given space-separated."),
+ nargs="+"
+ )
+ args = argparser.parse_args()
+ with open(args.config_location) as f:
+ config = json.load(f)
+
+ # Hacks
+ safelog = ("log_type IN ('" +
+ "', '".join(config["logging_whitelist"]) +
+ "')")
+ customviews = config["customviews"]
+ customviews["logging"]["where"] = safelog
+ customviews["logging_logindex"]["where"] = ("(log_deleted&1)=0 and " +
+ safelog)
+ customviews["logging_userindex"]["where"] = ("(log_deleted&4)=0 and " +
+ safelog)
+
+ if os.path.isdir('mediawiki-config'):
+ logging.info("Running git pull in mediawiki-config")
+ subprocess.call(["git", "pull"], cwd="mediawiki-config")
+ else:
+ logging.info("Running git clone for mediawiki-config")
+ subprocess.call([
+ "git",
+ "clone",
+ "https://gerrit.wikimedia.org/r/p/operations/mediawiki-config.git"
+ ])
+
+ with open("mediawiki-config/dblists/all.dblist") as f:
+ all_available_dbs = f.read().splitlines()
+ all_available_dbs.append("centralauth")
+ if args.databases:
+ dbs = {}
+ for db in args.databases:
+ if db in all_available_dbs:
+ dbs[db] = {}
+ else:
+ logging.info(
+ "Ignoring database {} which doesn't appear to exist."
+ .format(db)
+ )
+
+ if not len(dbs.keys()):
+ logging.critical("No databases specified exist.")
+ else:
+ dbs = {db: {} for db in all_available_dbs}
+
+ def read_list(fname, prop, val):
+ with open("mediawiki-config/dblists/{}.dblist".format(fname)) as f:
+ for db in f.read().splitlines():
+ if db in dbs:
+ dbs[db][prop] = val
+
+ # Reads various .dblist files to store information about specific
+ # databases. The first line, for example, reads deleted.dblist, and for
+ # each listed database name sets dbs[db_name]["deleted"] = True
+ read_list("deleted", "deleted", True)
+ read_list("private", "private", True)
+ read_list("small", "size", 1)
+ read_list("medium", "size", 2)
+ read_list("large", "size", 3)
+
+ logging.info("Got all necessary info, starting to connect to slices")
+ for dbhost, dbport in config["slices"]:
+ do_dbhost(
+ args.dry_run,
+ args.replace_all,
+ dbhost,
+ dbport,
+ config,
+ dbs,
+ customviews
+ )
+
+ logging.info("All done.")
diff --git a/tox.ini b/tox.ini
index 3299fea..02bfcd5 100644
--- a/tox.ini
+++ b/tox.ini
@@ -42,7 +42,7 @@
compare-puppet-catalogs/*,
fwconfigtool/*,
geturls/*,
- maintain-replicas/*,
+ maintain-replicas/maintain-meta_p.py,
swift-synctool/*,
swiftcleaner/*,
swiftrepl/*,
--
To view, visit https://gerrit.wikimedia.org/r/295607
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icc51fc9a23f2fcf4c19c7adc092662d7122471e1
Gerrit-PatchSet: 31
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: Alex Monk <[email protected]>
Gerrit-Reviewer: Alex Monk <[email protected]>
Gerrit-Reviewer: Andrew Bogott <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: Chasemp <[email protected]>
Gerrit-Reviewer: Coren <[email protected]>
Gerrit-Reviewer: Halfak <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
Gerrit-Reviewer: Rush <[email protected]>
Gerrit-Reviewer: Volans <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits