Elukey has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/356383 )
Change subject: role::mariadb::analytics::custom_repl_slave: add eventlogging_cleaner.py ...................................................................... role::mariadb::analytics::custom_repl_slave: add eventlogging_cleaner.py This script implements the purge/sanitization policies outlined in T108850. Unit tests have been added at the bottom of the file with instructions about how to run them properly. Bug: T108850 Change-Id: I33e312cd39a9860c895897fcb90ed23820ca4dff --- A modules/role/files/mariadb/eventlogging_cleaner.py A modules/role/files/mariadb/eventlogging_purging_whitelist.tsv M modules/role/manifests/mariadb.pp 3 files changed, 1,858 insertions(+), 0 deletions(-) Approvals: Elukey: Looks good to me, approved jenkins-bot: Verified diff --git a/modules/role/files/mariadb/eventlogging_cleaner.py b/modules/role/files/mariadb/eventlogging_cleaner.py new file mode 100644 index 0000000..58eee6d --- /dev/null +++ b/modules/role/files/mariadb/eventlogging_cleaner.py @@ -0,0 +1,820 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +This script enforces the Analytics data retention guidelines outlined in: +https://wikitech.wikimedia.org/wiki/Analytics/Systems/EventLogging/Data_retention_and_auto-purging + +The script reads a whitelist (TSV file) with the following format for each line: + +Tablename\tfield +Tablename\tfield2 +Tablename2\tfield_bla +[...] + +The script works in the following way: for each table in the EventLogging database, +it looks for any reference of it in the whitelist. If none is found, it means that +there is no interest of preserving any kind of non-sensitive data, therefore +the retention policy is applied simply deleting all the rows matching the time +delta provided in input. If one or more reference is found, it means that some +fields of a given table need to be preserved for historical analytics, therefore +the script will execute update commands to set to NULL all the non-whitelisted fields +belonging to rows matching the time delta provided in input. + +Important notes: +1) The script is meant to run on the same host in which the database that needs + to be cleaned is running. The script will try basic authentication + if any of DB username/password are provided by the user as my.cnf configuration + file (the conf file needs to have a [client] section with 'user' and 'password'). +2) If a table is listed in the whitelist, then some of its fields are automatically + added to it (see COMMON_PERSISTENT_FIELDS). This ensures that important fields + like timestamp or primary keys are preserved. +3) The script runs updates/deletes in batches to avoid blocking the database for too + long creating contention with other write operations (like inserts). +""" + +import argparse +import collections +import configparser +import csv +import logging +import os +import re +import sys +import time +import unittest +import uuid + +from datetime import datetime, timedelta +from unittest.mock import MagicMock, Mock, call, patch + +import pymysql + +DATE_FORMAT = '%Y%m%d%H%M%S' + +# Fields that are always present due to the EventLogging Capsule. +# These ones are automatically whitelisted due to their importance. +COMMON_PERSISTENT_FIELDS = ('id', 'uuid', 'timestamp') + +log = logging.getLogger(__name__) + + +class Database(object): + + def __init__(self, db_host, db_name, db_user, db_password=None, + db_port=None, unix_socket=None): + self.db_host = db_host + self.db_name = db_name + + if (db_password is not None or db_port is not None) and unix_socket is not None: + raise RuntimeError( + "Can not create a database connection. Specify either db_port and db_password " + "or unix_socket. You can not specify both at the same time." + ) + + self.connection = pymysql.connect( + host=db_host, + port=db_port, + db=db_name, + user=db_user, + password=db_password, + unix_socket=unix_socket, + autocommit=True, + charset='utf8', + use_unicode=True, + ) + + def execute(self, command, params=None, dry_run=False): + """ + Sends a single sql command to the server instance, + returns metadata about the execution and the resulting data. + """ + result = { + "query": command, + "args": params, + "host": self.db_host, + "database": self.db_name, + } + if dry_run: + log.info( + "(DRY-RUN) Executing command: %s with params: %s", command, params + ) + result.update({ + "success": True, + "fields": [], + "rows": [], + "numrows": 0, + }) + return result + + try: + with self.connection.cursor() as cursor: + log.info("Executing command %s with params %s", command, params) + cursor.execute(command, params) + + fields = None + rows = None + if cursor.rowcount > 0: + rows = cursor.fetchall() + fields = ( + [] if not cursor.description + else [x[0] for x in cursor.description] + ) + numrows = cursor.rowcount + + result.update({ + "success": True, + "fields": fields, + "rows": rows, + "numrows": numrows + }) + + except (pymysql.err.ProgrammingError, pymysql.err.OperationalError) as e: + log.exception('An error as occurred while executing the SQL command') + result.update({ + "success": False, + "errno": e.args[0], + "errmsg": e.args[1] + }) + return result + + def get_all_tables(self): + """ + Returns all the tables that holds EventLogging data. + The log database may hold tables from other services like EventBus, + so in this function we use a SQL query that checks for two attributes: + timestamp and any event_*. + """ + command = ( + "SELECT " + " table_name, " + " SUM(IF(column_name = 'timestamp', 1, 0)) AS has_timestamp_field, " + " SUM(IF(column_name LIKE 'event_%', 1, 0)) AS event_field_count " + "FROM information_schema.columns " + "WHERE table_schema = 'log' " + "GROUP BY table_name " + "HAVING " + "has_timestamp_field = 1 AND " + "event_field_count > 0" + ) + result = self.execute(command) + if 'rows' not in result or not result['rows']: + log.error('No tables found in database ' + self.db_name) + return [] + return [row[0] for row in result['rows']] + + def get_table_fields(self, table): + command = "DESCRIBE {}".format(table) + result = self.execute(command) + return [row[0] for row in result['rows']] + + def close_connection(self): + try: + self.connection.close() + except (pymysql.err.ProgrammingError, + pymysql.err.OperationalError): + log.exception("Failed to close the connection to the DB") + + +class Terminator(object): + + def __init__(self, database, whitelist, newer_than, older_than, + batch_size, sleep_between_batches, dry_run=False): + self.reference_time = datetime.utcnow() + self.database = database + self.whitelist = whitelist + self.start = self.relative_ts(newer_than) + self.end = self.relative_ts(older_than) + self.batch_size = batch_size + self.sleep_between_batches = sleep_between_batches + self.dry_run = dry_run + + def relative_ts(self, days): + return (self.reference_time - timedelta(days=days)).strftime(DATE_FORMAT) + + def purge(self, table): + """ + Drop all the rows in a given table with timestamp between + self.start and self.end. + """ + command = ( + "DELETE FROM `{}` " + "WHERE timestamp >= %(start_ts)s AND timestamp < %(end_ts)s " + "LIMIT %(batch_size)s".format(table) + ) + params = { + 'start_ts': self.start, + 'end_ts': self.end, + 'batch_size': self.batch_size, + } + result = self.database.execute(command, params, dry_run=self.dry_run) + # In case the deleted rows number is not equal to the batch size, + # it means that we have completed the last batch so we can avoid + # an extra loop cycle. + while result['numrows'] == self.batch_size: + result = self.database.execute(command, params, dry_run=self.dry_run) + time.sleep(self.sleep_between_batches) + + def _get_uuids_and_last_ts(self, table, start_ts, override_batch_size=None): + """ + Return the first <batch_size> uuids of the events between start_ts + and self.end. Also return the timestamp of the last of those events. + NOTE: If there exist several events that share the last timestamp, + it might be that some of them are listed in the uuid batch, and some + others aren't (do not fit in the batch size limit). In the next iteration + start_ts will be this iteration's last_ts, and so the script might + re-purge some events, which is OK, because the outcome does not change. + """ + batch_size = override_batch_size or self.batch_size + command = ( + "SELECT timestamp, uuid from {} WHERE timestamp >= %(start_ts)s " + "AND timestamp < %(end_ts)s ORDER BY timestamp LIMIT %(batch_size)s" + .format(table) + ) + params = { + 'start_ts': start_ts, + 'end_ts': self.end, + 'batch_size': batch_size, + } + result = self.database.execute(command, params, self.dry_run) + if result['rows']: + last_ts = result['rows'][-1][0] + if last_ts == start_ts: + if batch_size > 4 * self.batch_size: + raise RuntimeError( + "The number of events with the same timestamp ({}) " + "for table {} exceeded 4 times the configured batch size. " + "Aborting as a precautionary measure." + .format(start_ts, table) + ) + log.warning("All events in the batch have the same timestamp ({}) for table {}. " + "Growing the batch size to {}." + .format(start_ts, table, 2 * batch_size)) + return self._get_uuids_and_last_ts(table, start_ts, + override_batch_size=batch_size*2) + uuids = [x[1] for x in result['rows']] + return (uuids, last_ts) + else: + return ([], None) + + def sanitize(self, table): + """ + Set all the fields not in the whitelist (for a given table) to NULL. + The schema_prefix is needed since the whitelist contains only EventLogging + schema/table prefixes. + """ + # Get the table's whitelist prefix to retrieve the list of fields to save + # from the whitelist + table_prefix = table.split('_')[0] + # Sanity check + if table_prefix not in self.whitelist: + raise RuntimeError( + "Sanitize has been called for table {}, but its " + "prefix {} is not in the whitelist. Aborting as precautionary " + "measure since this error condition might indicate a bug in the code" + .format(table, table_prefix) + ) + fields = self.database.get_table_fields(table) + fields_to_keep = self.whitelist[table_prefix] + list(COMMON_PERSISTENT_FIELDS) + fields_to_purge = [f for f in fields if f not in fields_to_keep] + if not fields_to_purge: + log.warning("No fields to purge for table {}.".format(table)) + return + + values_string = ','.join([field + ' = NULL' for field in fields_to_purge]) + uuids_current_batch, last_ts = self._get_uuids_and_last_ts(table, self.start) + command_template = ( + "UPDATE {0} " + "SET {1} " + "WHERE uuid IN ({{}})" + ).format(table, values_string) + + while uuids_current_batch: + uuids_no = len(uuids_current_batch) + if uuids_no > self.batch_size: + log.warning("The number of uuids to sanitize {} is bigger " + "than the batch size {}, this condition should not " + "be possible, please review the code/data. " + .format(str(uuids_no), str(self.batch_size))) + + uuids_current_batch_escaped = ["'" + x + "'" for x in uuids_current_batch] + result = self.database.execute( + command_template.format(",".join(uuids_current_batch_escaped)), + dry_run=self.dry_run + ) + if result['numrows'] > uuids_no: + log.error("The number of uuids to sanitize {} is lower " + "than the number of updated rows in this batch {}. " + "This is definitely an error in the code, please review it." + .format(uuids_no, result['numrows'])) + raise RuntimeError('Sanitization stopped as precautionary step.') + + if uuids_no < self.batch_size: + # Avoid an extra SQL query to the database if the number of + # uuids returned are less than BATCH_SIZE, since this value + # means that we have already reached the last batch of uuids + # to sanitize. + uuids_current_batch = [] + else: + uuids_current_batch, last_ts = self._get_uuids_and_last_ts(table, last_ts) + time.sleep(self.sleep_between_batches) + + +def check_not_valid_whitelist_table_prefixes(whitelist, tables): + """ + Return all the whitelist table prefixes that do not match any table + provided in input. + """ + not_valid_table_prefixes = [] + for table_prefix in whitelist: + if not [t for t in tables if t.startswith(table_prefix + '_')]: + not_valid_table_prefixes.append(table_prefix) + return not_valid_table_prefixes + + +def parse_whitelist(rows): + """Parse rows containing tables and their attributes to whitelist + + Returns a hashmap with the following format: + - each key is a table name + - each value is a list of whitelisted fields + { + "tableName1": ["fieldName1", "fieldName2", ...], + "tableName2": [...], + ... + } + """ + whitelist_hash = collections.defaultdict(list) + allowed_tablename_format = re.compile("^[A-Za-z0-9_]+$") + allowed_fieldname_format = re.compile("^[A-Za-z0-9_.]+$") + lineno = 0 + for lineno, row in enumerate(rows): + if len(row) != 2: + raise RuntimeError('Error in the whitelist, line {}: ' + '2 elements per row allowed ' + '(tab to separate them).' + .format(lineno)) + + table_name = row[0].strip() + field_name = row[1].strip() + + if not allowed_tablename_format.match(table_name): + raise RuntimeError('Error in the whitelist, line {}: table name {} not ' + 'following the allowed format (^[A-Za-z0-9_]+$)' + .format(lineno, table_name)) + + if not allowed_fieldname_format.match(field_name): + raise RuntimeError('Error in the whitelist, line {}: field {} not ' + 'following the allowed format ' + '(^[A-Za-z0-9_.]+$)' + .format(lineno, field_name)) + + if field_name not in whitelist_hash[table_name]: + whitelist_hash[table_name].append(field_name) + else: + raise RuntimeError('Error in the whitelist, line {}: field {} ' + 'is listed multiple times.' + .format(lineno, field_name)) + + return whitelist_hash + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='EventLogging data ' + 'retention script') + parser.add_argument('--whitelist', + help='The full path of the TSV whitelist file. ' + 'Not compatible with --no-whitelist') + parser.add_argument('--no-whitelist', action='store_true', + help='Bypass any whitelist and sanitization scheme. ' + '(default: false).' + 'Not compatible with --whitelist') + parser.add_argument('--dbport', default=3306, type=int, + help='The target db port (default: 3306)') + parser.add_argument('--dbname', default='log', + help='The EventLogging database name (default: log)') + parser.add_argument('--older-than', dest='older_than', default=90, type=int, + help='Delete logs older than this number of days' + ' (default: 90)') + parser.add_argument('--newer-than', dest='newer_than', default=91, type=int, + help='Delete logs newer than this number of days' + ' (default: 91)') + parser.add_argument('--dry-run', dest='dry_run', action='store_true', + help='Only print sql commands without executing them') + parser.add_argument('--logfile', dest='logfile', default=None, + help='Redirect the script\'s output to a file rather ' + 'than stdout') + parser.add_argument('--batch-size', dest='batch_size', default=1000, type=int, + help='Maximum number of DB rows to update/delete in one go.' + ' (default: 1000)') + parser.add_argument('--sleep-between-batches', dest='sleep_between_batches', + default=1, type=int, + help='Sleep time in seconds between each delete/update batch.' + ' (default: 1)') + parser.add_argument('--my-cnf', dest='my_cnf', default='/etc/my.cnf', + help='Path to the mysql configuration file. Requires ' + 'a [client] section containing user and unix_socket path ' + 'fields, or alternatively user and password (but the first ' + 'option is preferred). Default: /etc/my.cnf') + args = parser.parse_args() + + log_format = ('%(levelname)s: line %(lineno)d: %(message)s') + + if args.logfile: + logging.basicConfig( + filename=args.logfile, + level=logging.INFO, + format=log_format + ) + else: + logging.basicConfig( + stream=sys.stdout, + level=logging.INFO, + format=log_format + ) + + # Args basic checks + if args.no_whitelist and args.whitelist: + log.error( + "The parameters --whitelist and --no-whitelist can't be used together." + ) + sys.exit(1) + + if not (args.no_whitelist or args.whitelist): + log.error( + "One of --whitelist and --no-whitelist needs to be used." + ) + sys.exit(1) + + if args.whitelist and not os.path.exists(args.whitelist): + log.error( + "The whitelist filepath provided ({}) does not exist" + .format(args.whitelist) + ) + sys.exit(1) + + if args.my_cnf and not os.path.exists(args.my_cnf): + log.error( + "The my_cnf filepath provided ({}) does not exist".format(args.my_cnf) + ) + sys.exit(1) + + if args.older_than < 90: + log.error( + "Attempt to delete data older than ({}) days " + "(any value less than 90 is not supported)" + .format(args.older_than) + ) + sys.exit(1) + + if args.newer_than <= args.older_than: + log.error("--newer-than must be stricly greater than --older-than") + sys.exit(1) + + try: + database = None + + # Extra sanity check to make sure that no future changes to the args + # parser will inadvertently cause data loss when deployed. + assert( + (args.whitelist and not args.no_whitelist) + or (not args.whitelist and args.no_whitelist) + ) + # Parse whitelist file + if args.whitelist: + with open(args.whitelist, 'r') as whitelist_fd: + lines = csv.reader(whitelist_fd, delimiter='\t') + whitelist = parse_whitelist(lines) + else: + whitelist = {} + + # Parse the db my.cnf config file + config = configparser.ConfigParser() + config.read(args.my_cnf) + + # Priority to the local unix socket, default to username/password + try: + unix_socket = config.get('client', 'socket') + db_user = os.getlogin() + db_password = None + except configparser.NoOptionError as e: + log.info( + "No local unix socket configured for myql, default to username/password" + ) + unix_socket = None + db_user = config.get('client', 'user') + db_password = config.get('client', 'password') + + # Connect to the database in localhost (no other option + # available). This is a design choice to simplify auth + # and to restrict the actions taken to the local db only. + database = Database('localhost', args.dbname, db_user, db_password=db_password, + db_port=args.dbport, unix_socket=unix_socket) + + # Apply the retention policy to each table + tables = database.get_all_tables() + if not tables: + log.info('Forcing close, no tables on the database.') + sys.exit(1) + + # Sanity check + bad_whitelist_entries = check_not_valid_whitelist_table_prefixes(whitelist, tables) + if bad_whitelist_entries: + log.error( + "Some table prefixes in the whitelist do not match any " + "table name retrieved from the database. Please review " + "the following entries of the whitelist: %s", bad_whitelist_entries + ) + sys.exit(1) + + terminator = Terminator( + database, + whitelist, + args.newer_than, + args.older_than, + args.batch_size, + args.sleep_between_batches, + dry_run=args.dry_run + ) + + # Assumption: the whitelist contains only table prefixes, not complete + # names. For example an EL table name could be 'Echo_1234_1234', + # and the correspondent whitelist entry would be 'Echo'. + # + # Two purging methods: + # 1) if the table name does not match any table prefix contained + # in the whitelist, it means that no field + # needs to be preserved, hence the rows can just be deleted. + # 2) if the table name matches any of the table prefixes contained + # in the whitelist, it means thta the rows needs to be updated + # with all the fields not whitelisted set as NULL. + for table in tables: + schema_prefix = table.split('_')[0] + if schema_prefix not in whitelist: + terminator.purge(table) + else: + terminator.sanitize(table) + except Exception as e: + log.exception("Exception while running main") + sys.exit(1) + finally: + if database: + database.close_connection() + + +# ##### Tests ###### +# To run: +# python3 -m unittest eventlogging_cleaner +# +# Why are the tests embedded in this file instead of a proper Python package? +# +# Deploying this script via puppet was considered to be a good tradeoff between +# the need of having tests and the effort to set up a proper code structure. +# The alternative was to create a proper Python package and deploy it via scap +# or via Debian package, but it was considered overkill. +# +# ################### + + +class TestDatabase(unittest.TestCase): + + def setUp(self): + print("Test: ", self._testMethodName) + + @patch('pymysql.connect') + def test_dry_run(self, mock): + """ + Verify that the dry_run mode does not end up in any call + to the database. + """ + connection_mock = MagicMock() + connection_mock.cursor.return_value = MagicMock() + mock.return_value = connection_mock + db = Database("localhost", "log", "batman", db_password="NaNaNaNaNa", + db_port=3306) + db.execute("show tables", dry_run=True) + self.assertFalse(connection_mock.cursor.execute.called) + + +class TestParser(unittest.TestCase): + + def setUp(self): + print("Test: ", self._testMethodName) + + def test_check_not_valid_whitelist_table_prefixes(self): + """ + Test if all the whitelist table prefixes not contained in the table + list is returned correctly. + """ + tables = ['AwesomeTableBatman_1234', 'AnotherTable_5677', 'AwesomeTable_789'] + whitelist = {'AwesomeTable': ['field1', 'field2'], 'NotGood': ['field1', 'field2']} + expected_result = ['NotGood'] + result = check_not_valid_whitelist_table_prefixes(whitelist, tables) + self.assertEqual(result, expected_result) + + tables = ['AwesomeTableBatman_1234', 'AnotherTable_5677', 'AwesomeTable_789'] + whitelist = { + 'AwesomeTable': ['field1', 'field2'], + 'AwesomeTableBatman': ['field1', 'field2'] + } + result = check_not_valid_whitelist_table_prefixes(whitelist, tables) + self.assertEqual(result, []) + + def test_row_elements(self): + """ + Test basic functionality of the parser (for example the data + structure returned must have a specific format and organization). + """ + rows = [["TestTable", "TestField"]] + result = parse_whitelist(rows) + expected_result = {"TestTable": ["TestField"]} + self.assertDictEqual(result, expected_result) + + rows = [["TestTable", "TestField"], ["TestTable", "TestField1"]] + result = parse_whitelist(rows) + expected_result = {"TestTable": ["TestField", "TestField1"]} + self.assertDictEqual(result, expected_result) + + rows = [["TestTable_1", "TestField"], + ["TestTable_1", "TestField2"], + ["TestTable1", "TestField1.test"]] + result = parse_whitelist(rows) + expected_result = { + "TestTable_1": ["TestField", "TestField2"], + "TestTable1": ["TestField1.test"], + } + self.assertDictEqual(result, expected_result) + + def test_parse_guards(self): + """ + Test basic input sanity checks to prevent easy mistakes + while configuring the whitelist. + """ + duplicate_rows = [["TestTable", "TestField"], ["TestTable", "TestField"]] + with self.assertRaises(RuntimeError): + parse_whitelist(duplicate_rows) + + wrong_chars_in_rows = [["TestTable.*", "TestField"], ["TestTable**", "TestField"]] + with self.assertRaises(RuntimeError): + parse_whitelist(wrong_chars_in_rows) + + wrong_chars_in_rows = [["TestTable", "TestField---"], ["TestTable", "TestField"]] + with self.assertRaises(RuntimeError): + parse_whitelist(wrong_chars_in_rows) + + too_many_el_in_rows = [["TestTable", "TestField", "NotRight"]] + with self.assertRaises(RuntimeError): + parse_whitelist(too_many_el_in_rows) + + +class TestTerminator(unittest.TestCase): + + def setUp(self): + print("Test: ", self._testMethodName) + self.database = MagicMock() + self.batch_size = 1000 + self.terminator = Terminator(self.database, {}, 120, 90, + self.batch_size, 0.1, dry_run=False) + + def test_relative_ts(self): + now = datetime.utcnow() + self.terminator.reference_time = now + result = self.terminator.relative_ts(30) + expected_result = (now - timedelta(days=30)).strftime(DATE_FORMAT) + self.assertEqual(result, expected_result) + + def test_purge(self): + self.terminator.database.execute.side_effect = [{'numrows': self.batch_size}, + {'numrows': self.batch_size}, + {'numrows': 0}] + + expected_sql = ( + "DELETE FROM `AwesomeTable` WHERE timestamp >= %(start_ts)s " + "AND timestamp < %(end_ts)s LIMIT %(batch_size)s" + ) + expected_params = { + 'start_ts': self.terminator.start, + 'end_ts': self.terminator.end, + 'batch_size': self.terminator.batch_size, + } + self.terminator.purge("AwesomeTable") + self.terminator.database.execute.assert_has_calls([ + call(expected_sql, expected_params, dry_run=False), + call(expected_sql, expected_params, dry_run=False), + call(expected_sql, expected_params, dry_run=False) + ]) + + # The Database execute method only catches pymysql specific exception, + # returning a empty result. Any other exception returned is considered + # not expected and the terminator class does not try to catch anything. + self.terminator.database.execute.side_effect = RuntimeError("This is a bad exception") + with self.assertRaises(RuntimeError): + self.terminator.purge("AwesomeTable") + + def test_get_uuids_and_last_ts(self): + random_uuids = [] + for ts in range(400): + random_uuids.append((str(ts).zfill(3), str(uuid.uuid4()))) + self.terminator.database.execute.side_effect = [{'rows': random_uuids}] + result = self.terminator._get_uuids_and_last_ts("AwesomeTable", 10) + expected_result = ([x[1] for x in random_uuids], random_uuids[-1][0]) + self.assertEqual(result, expected_result) + + def test_sanitize_one_batch(self): + """ + Sanitize called on a number of uuids less than one batch size + """ + self.terminator.database.get_table_fields.return_value = ['id', 'uuid', 'field1', + 'field2', 'field3', 'field4'] + BATCH_SIZE_TEST = 400 # less than terminator's batch size + random_uuids = [str(uuid.uuid4()) for r in range(BATCH_SIZE_TEST)] + self.terminator._get_uuids_and_last_ts = Mock( + return_value=(random_uuids, '20010101000000')) + self.terminator.database.execute.return_value = {'numrows': BATCH_SIZE_TEST} + self.terminator.whitelist = {'AwesomeTable': ['field1', 'field2']} + expected_fields_to_sanitize = ','.join( + [field + ' = NULL' for field in ['field3', 'field4']] + ) + expected_uuids_in_where = ','.join(["'" + x + "'" for x in random_uuids]) + command_template = ( + "UPDATE AwesomeTable " + "SET {0} " + "WHERE uuid IN ({{}})" + ).format(expected_fields_to_sanitize) + expected_command = command_template.format(expected_uuids_in_where) + self.terminator.sanitize("AwesomeTable") + self.terminator.database.execute.assert_called_once_with( + expected_command, dry_run=False) + + def test_sanitize_multi_batches(self): + """ + Sanitize called on a number of uuids that requires multiple batches. + This test ensure that the update statements are executed in the right + order and in the right number. + """ + self.terminator.database.get_table_fields.return_value = ['id', 'uuid', 'field1', + 'field2', 'field3', 'field4'] + random_uuids = [str(uuid.uuid4()) for r in range(self.batch_size)] + random_uuids2 = [str(uuid.uuid4()) for r in range(5)] + self.terminator._get_uuids_and_last_ts = Mock(side_effect=[ + (random_uuids, '20170101000000'), + (random_uuids2, '20170102000000') + ]) + self.terminator.database.execute.side_effect = [{'numrows': self.batch_size}, + {'numrows': 5}] + self.terminator.whitelist = {'AwesomeTable': ['field1', 'field2']} + expected_fields_to_sanitize = ','.join( + [field + ' = NULL' for field in ['field3', 'field4']] + ) + expected_uuids_in_where_1 = ','.join(["'" + x + "'" for x in random_uuids]) + expected_uuids_in_where_2 = ','.join(["'" + x + "'" for x in random_uuids2]) + command_template = ( + "UPDATE AwesomeTable " + "SET {0} " + "WHERE uuid IN ({1})" + ).format(expected_fields_to_sanitize, '{}') + expected_command1 = command_template.format(expected_uuids_in_where_1) + expected_command2 = command_template.format(expected_uuids_in_where_2) + self.terminator.sanitize("AwesomeTable") + self.terminator.database.execute.assert_has_calls([ + call(expected_command1, dry_run=False), + call(expected_command2, dry_run=False), + ]) + + def test_sanitize_input_error_condition(self): + """ + The table name that the sanitize will work on needs to have its prefix + contained in the whitelist. + """ + self.terminator.whitelist = {'AwesomeTable': ['field1', 'field2']} + error_msg = ( + 'Sanitize has been called for table NotAwesomeTable_1234, ' + 'but its prefix NotAwesomeTable' + ) + with self.assertRaisesRegex(RuntimeError, error_msg): + self.terminator.sanitize("NotAwesomeTable_1234") + + def test_sanitize_multi_batches_error_condition(self): + """ + The number of updated rows is bigger than the number of uuids in a batch. + """ + self.terminator.database.get_table_fields.return_value = ['id', 'uuid', 'field1', + 'field2', 'field3', 'field4'] + random_uuids = [str(uuid.uuid4()) + "'" for r in range(self.batch_size)] + self.terminator._get_uuids_and_last_ts = Mock( + side_effect=[(random_uuids, '20170101000000')]) + self.terminator.database.execute.side_effect = [{'numrows': self.batch_size * 2}] + self.terminator.whitelist = {'AwesomeTable': ['field1', 'field2']} + with self.assertRaisesRegex(RuntimeError, 'Sanitization stopped as precautionary step.'): + self.terminator.sanitize("AwesomeTable") + + def test_sanitize_table_without_fields_to_purge(self): + """ + The table has all its fields white-listed or public by default. + """ + self.terminator.database.get_table_fields.return_value = ['id', 'uuid', 'field1', + 'field2', 'field3', 'field4'] + self.terminator.whitelist = {'AwesomeTable': ['field1', 'field2', 'field3', 'field4']} + self.terminator._get_uuids_and_last_ts = MagicMock() + self.terminator.sanitize("AwesomeTable") + self.assertFalse(self.terminator._get_uuids_and_last_ts.called) diff --git a/modules/role/files/mariadb/eventlogging_purging_whitelist.tsv b/modules/role/files/mariadb/eventlogging_purging_whitelist.tsv new file mode 100644 index 0000000..62d6f0c --- /dev/null +++ b/modules/role/files/mariadb/eventlogging_purging_whitelist.tsv @@ -0,0 +1,1027 @@ +DiacriticsPoll webHost +DiacriticsPoll wiki +DiacriticsPoll event_country +DiacriticsPoll event_text +DiacriticsVisibility webHost +DiacriticsVisibility wiki +DiacriticsVisibility event_country +DiacriticsVisibility event_issues +Echo clientValidated +Echo event_deliveryMethod +Echo event_eventSource +Echo event_notificationGroup +Echo event_notificationType +Echo event_revisionId +Echo event_sender +Echo event_version +_EchoInteraction clientValidated +EchoInteraction clientValidated +_EchoInteraction event_action +EchoInteraction event_action +_EchoInteraction event_context +EchoInteraction event_context +_EchoInteraction event_mobile +EchoInteraction event_mobile +_EchoInteraction event_notificationType +EchoInteraction event_notificationType +_EchoInteraction event_version +EchoInteraction event_version +_EchoInteraction isTruncated +EchoInteraction isTruncated +_EchoInteraction webHost +EchoInteraction webHost +_EchoInteraction wiki +EchoInteraction wiki +Echo isTruncated +EchoMail clientValidated +EchoMail event_emailDeliveryMode +EchoMail event_version +EchoMail isTruncated +EchoMail webHost +EchoMail wiki +Echo webHost +Echo wiki +Edit clientValidated +Edit event_action +Edit event_action.abort.mechanism +Edit event_action.abort.timing +Edit event_action.abort.type +Edit event_action.init.mechanism +Edit event_action.init.timing +Edit event_action.init.type +Edit event_action.ready.timing +Edit event_action.saveAttempt.timing +Edit event_action.saveFailure.message +Edit event_action.saveFailure.timing +Edit event_action.saveFailure.type +Edit event_action.saveIntent.timing +Edit event_action.saveSuccess.timing +Edit event_editingSessionId +Edit event_editor +Edit event_integration +Edit event_latency +Edit event_mediawiki.version +Edit event_page.id +Edit event_pageId +Edit event_page.length +Edit event_pageName +Edit event_page.ns +Edit event_pageNs +Edit event_page.revid +Edit event_page.title +Edit event_pageViewSessionId +Edit event_platform +Edit event_revId +Edit event_user.class +Edit event_user.editCount +Edit event_user.id +Edit event_userId +Edit event_version +Edit isTruncated +Edit webHost +Edit wiki +EditorActivation webHost +EditorActivation wiki +EditorActivation event_month +EditorActivation event_userId +FlowReplies clientValidated +FlowReplies event_action +FlowReplies event_entrypoint +FlowReplies event_funnelId +FlowReplies event_isAnon +FlowReplies event_pageNs +FlowReplies isTruncated +FlowReplies webHost +FlowReplies wiki +GatherClicks event_errorText +GatherClicks event_eventName +GatherClicks event_mobileMode +GatherClicks event_skin +GatherClicks event_source +GatherClicks event_userEditCount +GatherClicks webHost +GatherClicks wiki +GatherClicks editCountBucket +GatherClicks event_editCountBucket +GatherFlags event_collectionId +GatherFlags event_userEditCount +GatherFlags event_userGroups +GatherFlags webHost +GatherFlags wiki +GatherFlags editCountBucket +GatherFlags event_editCountBucket +GettingStartedNavbarNoArticle clientValidated +GettingStartedNavbarNoArticle event_funnel +GettingStartedNavbarNoArticle event_version +GettingStartedNavbarNoArticle isTruncated +GettingStartedNavbarNoArticle webHost +GettingStartedNavbarNoArticle wiki +GettingStartedOnRedirect clientValidated +GettingStartedOnRedirect event_action +GettingStartedOnRedirect event_bucket +GettingStartedOnRedirect event_funnel +GettingStartedOnRedirect event_isEditable +GettingStartedOnRedirect event_isNavbarVisible +GettingStartedOnRedirect event_pageId +GettingStartedOnRedirect event_pageNS +GettingStartedOnRedirect event_revId +GettingStartedOnRedirect event_source +GettingStartedOnRedirect event_userId +GettingStartedOnRedirect event_version +GettingStartedOnRedirect isTruncated +GettingStartedOnRedirect webHost +GettingStartedOnRedirect wiki +GettingStartedRedirectImpression clientValidated +GettingStartedRedirectImpression event_action +GettingStartedRedirectImpression event_ctaType +GettingStartedRedirectImpression event_currentRevId +GettingStartedRedirectImpression event_isEditable +GettingStartedRedirectImpression event_pageId +GettingStartedRedirectImpression event_pageNS +GettingStartedRedirectImpression event_userId +GettingStartedRedirectImpression isTruncated +GettingStartedRedirectImpression webHost +GettingStartedRedirectImpression wiki +GuidedTourButtonClick clientValidated +GuidedTourButtonClick event_action +GuidedTourButtonClick event_label +GuidedTourButtonClick event_labelKey +GuidedTourButtonClick event_sessionToken +GuidedTourButtonClick event_step +GuidedTourButtonClick event_tour +GuidedTourButtonClick event_userId +GuidedTourButtonClick event_userText +GuidedTourButtonClick isTruncated +GuidedTourButtonClick webHost +GuidedTourButtonClick wiki +GuidedTour clientValidated +GuidedTour event_action +GuidedTour event_eventId +GuidedTour event_lastGuiderId +GuidedTour event_step +GuidedTour event_tour +GuidedTour event_tourName +GuidedTour event_userId +GuidedTourExited clientValidated +GuidedTourExited event_sessionToken +GuidedTourExited event_step +GuidedTourExited event_tour +GuidedTourExited event_userId +GuidedTourExited event_userText +GuidedTourExited isTruncated +GuidedTourExited webHost +GuidedTourExited wiki +GuidedTourExternalLinkActivation clientValidated +GuidedTourExternalLinkActivation event_href +GuidedTourExternalLinkActivation event_label +GuidedTourExternalLinkActivation event_sessionToken +GuidedTourExternalLinkActivation event_step +GuidedTourExternalLinkActivation event_tour +GuidedTourExternalLinkActivation event_userId +GuidedTourExternalLinkActivation event_userText +GuidedTourExternalLinkActivation isTruncated +GuidedTourExternalLinkActivation webHost +GuidedTourExternalLinkActivation wiki +GuidedTourGuiderHidden clientValidated +GuidedTourGuiderHidden event_sessionToken +GuidedTourGuiderHidden event_step +GuidedTourGuiderHidden event_tour +GuidedTourGuiderHidden event_userId +GuidedTourGuiderHidden event_userText +GuidedTourGuiderHidden isTruncated +GuidedTourGuiderHidden webHost +GuidedTourGuiderHidden wiki +GuidedTourGuiderImpression clientValidated +GuidedTourGuiderImpression event_sessionToken +GuidedTourGuiderImpression event_step +GuidedTourGuiderImpression event_tour +GuidedTourGuiderImpression event_userId +GuidedTourGuiderImpression event_userText +GuidedTourGuiderImpression isTruncated +GuidedTourGuiderImpression webHost +GuidedTourGuiderImpression wiki +GuidedTourInternalLinkActivation clientValidated +GuidedTourInternalLinkActivation event_label +GuidedTourInternalLinkActivation event_pageName +GuidedTourInternalLinkActivation event_sessionToken +GuidedTourInternalLinkActivation event_step +GuidedTourInternalLinkActivation event_tour +GuidedTourInternalLinkActivation event_userId +GuidedTourInternalLinkActivation event_userText +GuidedTourInternalLinkActivation isTruncated +GuidedTourInternalLinkActivation webHost +GuidedTourInternalLinkActivation wiki +GuidedTour isTruncated +GuidedTour webHost +GuidedTour wiki +MediaWikiInstallPingback webHost +MediaWikiInstallPingback wiki +MediaWikiInstallPingback event_MediaWiki +MediaWikiInstallPingback event_OS +MediaWikiInstallPingback event_PHP +MediaWikiInstallPingback event_arch +MediaWikiInstallPingback event_curl +MediaWikiInstallPingback event_database +MediaWikiInstallPingback event_machine +MediaWikiInstallPingback event_webServer +MobileOptionsTracking clientValidated +MobileOptionsTracking event_action +MobileOptionsTracking event_alpha +MobileOptionsTracking event_beta +MobileOptionsTracking event_errorText +MobileOptionsTracking event_images +MobileOptionsTracking isTruncated +MobileOptionsTracking webHost +MobileOptionsTracking wiki +MobileWebBrowse webHost +MobileWebBrowse wiki +MobileWebBrowse event_action +MobileWebBrowse event_article +MobileWebBrowse event_articleIndex +MobileWebBrowse event_mobileMode +MobileWebBrowse event_tag +MobileWebClickTracking clientValidated +MobileWebClickTracking event_mobileMode +MobileWebClickTracking event_name +MobileWebClickTracking event_userEditCount +MobileWebClickTracking isTruncated +MobileWebClickTracking webHost +MobileWebClickTracking wiki +MobileWebClickTracking editCountBucket +MobileWebClickTracking event_editCountBucket +MobileWebCta clientValidated +MobileWebCta event_campaign +MobileWebCta event_campaignStep +MobileWebCta event_mobileMode +MobileWebCta event_status +MobileWebCta event_userEditCount +MobileWebCta isTruncated +MobileWebCta webHost +MobileWebCta wiki +MobileWebCta editCountBucket +MobileWebCta event_editCountBucket +MobileWebDiffClickTracking clientValidated +MobileWebDiffClickTracking event_mobileMode +MobileWebDiffClickTracking event_name +MobileWebDiffClickTracking event_userEditCount +MobileWebDiffClickTracking isTruncated +MobileWebDiffClickTracking webHost +MobileWebDiffClickTracking wiki +MobileWebDiffClickTracking editCountBucket +MobileWebDiffClickTracking event_editCountBucket +MobileWebEditing clientValidated +MobileWebEditing event_action +MobileWebEditing event_editor +MobileWebEditing event_errorText +MobileWebEditing event_funnel +MobileWebEditing event_isTestA +MobileWebEditing event_mobileMode +MobileWebEditing event_namespace +MobileWebEditing event_pageId +MobileWebEditing event_revId +MobileWebEditing event_section +MobileWebEditing event_token +MobileWebEditing event_userEditCount +MobileWebEditing isTruncated +MobileWebEditing webHost +MobileWebEditing wiki +MobileWebInfobox clientValidated +MobileWebInfobox event_height +MobileWebInfobox event_in +MobileWebInfobox event_infoboxLength +MobileWebInfobox event_out +MobileWebInfobox event_start +MobileWebInfobox event_wasInteraction +MobileWebInfobox event_width +MobileWebInfobox isTruncated +MobileWebInfobox webHost +MobileWebInfobox wiki +MobileWebLanguageSwitcher webHost +MobileWebLanguageSwitcher wiki +MobileWebLanguageSwitcher event_beaconCapable +MobileWebLanguageSwitcher event_event +MobileWebLanguageSwitcher event_exitModal +MobileWebLanguageSwitcher event_funnelToken +MobileWebLanguageSwitcher event_languageButtonTappedBucket +MobileWebLanguageSwitcher event_languageButtonVersion +MobileWebLanguageSwitcher event_languageCount +MobileWebLanguageSwitcher event_languageOverlayVersion +MobileWebLanguageSwitcher event_languageTapped +MobileWebLanguageSwitcher event_mobileMode +MobileWebLanguageSwitcher event_positionOfLanguageTapped +MobileWebLanguageSwitcher event_primaryLanguageOfUser +MobileWebLanguageSwitcher event_searchInputHasQuery +MobileWebMainMenuClickTracking clientValidated +MobileWebMainMenuClickTracking event_mobileMode +MobileWebMainMenuClickTracking event_name +MobileWebMainMenuClickTracking event_userEditCount +MobileWebMainMenuClickTracking isTruncated +MobileWebMainMenuClickTracking webHost +MobileWebMainMenuClickTracking wiki +MobileWebMainMenuClickTracking editCountBucket +MobileWebMainMenuClickTracking event_editCountBucket +MobileWebSearch event_action +MobileWebSearch event_clickIndex +MobileWebSearch event_numberOfResults +MobileWebSearch event_platform +MobileWebSearch event_platformVersion +MobileWebSearch event_resultSetType +MobileWebSearch event_searchSessionToken +MobileWebSearch event_timeOffsetSinceStart +MobileWebSearch event_timeToDisplayResults +MobileWebSearch event_userSessionToken +MobileWebSearch webHost +MobileWebSearch wiki +MobileWebSectionUsage webHost +MobileWebSectionUsage wiki +MobileWebSectionUsage event_eventName +MobileWebSectionUsage event_hasServiceWorkerSupport +MobileWebSectionUsage event_isTablet +MobileWebSectionUsage event_isTestA +MobileWebSectionUsage event_namespace +MobileWebSectionUsage event_pageId +MobileWebSectionUsage event_section +MobileWebSectionUsage event_sectionCount +MobileWebSectionUsage event_sessionId +MobileWebUIClickTracking clientValidated +MobileWebUIClickTracking event_mobileMode +MobileWebUIClickTracking event_name +MobileWebUIClickTracking event_userEditCount +MobileWebUIClickTracking isTruncated +MobileWebUIClickTracking webHost +MobileWebUIClickTracking wiki +MobileWebUIClickTracking editCountBucket +MobileWebUIClickTracking event_editCountBucket +MobileWebUploads clientValidated +MobileWebUploads event_action +MobileWebUploads event_errorText +MobileWebUploads event_funnel +MobileWebUploads event_isEditable +MobileWebUploads event_mobileMode +MobileWebUploads event_pageId +MobileWebUploads event_token +MobileWebUploads isTruncated +MobileWebUploads webHost +MobileWebUploads wiki +MobileWebWatching event_funnel +MobileWebWatching event_isWatched +MobileWebWatching event_mobileMode +MobileWebWatching event_userEditCount +MobileWebWatching webHost +MobileWebWatching wiki +MobileWebWatching editCountBucket +MobileWebWatching event_editCountBucket +MobileWebWatchlistClickTracking clientValidated +MobileWebWatchlistClickTracking event_mobileMode +MobileWebWatchlistClickTracking event_name +MobileWebWatchlistClickTracking event_userEditCount +MobileWebWatchlistClickTracking isTruncated +MobileWebWatchlistClickTracking webHost +MobileWebWatchlistClickTracking wiki +MobileWebWatchlistClickTracking editCountBucket +MobileWebWatchlistClickTracking event_editCountBucket +MobileWebWikiGrok clientValidated +MobileWebWikiGrokError clientValidated +MobileWebWikiGrokError event_error +MobileWebWikiGrokError event_isLoggedIn +MobileWebWikiGrokError event_mobileMode +MobileWebWikiGrokError event_taskToken +MobileWebWikiGrokError event_taskType +MobileWebWikiGrokError event_testing +MobileWebWikiGrokError event_userToken +MobileWebWikiGrokError isTruncated +MobileWebWikiGrokError webHost +MobileWebWikiGrokError wiki +MobileWebWikiGrok event_action +MobileWebWikiGrok event_isLoggedIn +MobileWebWikiGrok event_mobileMode +MobileWebWikiGrok event_scrollOffset +MobileWebWikiGrok event_taskToken +MobileWebWikiGrok event_taskType +MobileWebWikiGrok event_testing +MobileWebWikiGrok event_userEditCount +MobileWebWikiGrok event_username +MobileWebWikiGrok event_userToken +MobileWebWikiGrok event_version +MobileWebWikiGrok event_widgetOffset +MobileWebWikiGrok isTruncated +MobileWebWikiGrokResponse clientValidated +MobileWebWikiGrokResponse event_isLoggedIn +MobileWebWikiGrokResponse event_mobileMode +MobileWebWikiGrokResponse event_property +MobileWebWikiGrokResponse event_propertyId +MobileWebWikiGrokResponse event_response +MobileWebWikiGrokResponse event_sourceWiki +MobileWebWikiGrokResponse event_subject +MobileWebWikiGrokResponse event_subjectId +MobileWebWikiGrokResponse event_taskToken +MobileWebWikiGrokResponse event_taskType +MobileWebWikiGrokResponse event_testing +MobileWebWikiGrokResponse event_userEditCount +MobileWebWikiGrokResponse event_userToken +MobileWebWikiGrokResponse event_value +MobileWebWikiGrokResponse event_valueId +MobileWebWikiGrokResponse isTruncated +MobileWebWikiGrokResponse webHost +MobileWebWikiGrokResponse wiki +MobileWebWikiGrok webHost +MobileWebWikiGrok wiki +MobileWikiAppAppearanceSettings clientValidated +MobileWikiAppAppearanceSettings event_action +MobileWikiAppAppearanceSettings event_appearanceAppInstallID +MobileWikiAppAppearanceSettings event_appInstallID +MobileWikiAppAppearanceSettings event_currentValue +MobileWikiAppAppearanceSettings event_newValue +MobileWikiAppAppearanceSettings isTruncated +MobileWikiAppAppearanceSettings webHost +MobileWikiAppAppearanceSettings wiki +MobileWikiAppAppearanceSettings userAgent +MobileWikiAppArticleSuggestions clientValidated +MobileWikiAppArticleSuggestions event_action +MobileWikiAppArticleSuggestions event_appInstallID +MobileWikiAppArticleSuggestions event_readMoreIndex +MobileWikiAppArticleSuggestions event_readMoreSource +MobileWikiAppArticleSuggestions event_version +MobileWikiAppArticleSuggestions event_latency +MobileWikiAppArticleSuggestions isTruncated +MobileWikiAppArticleSuggestions webHost +MobileWikiAppArticleSuggestions wiki +MobileWikiAppArticleSuggestions userAgent +MobileWikiAppCreateAccount clientValidated +MobileWikiAppCreateAccount event_action +MobileWikiAppCreateAccount event_createAccountSessionToken +MobileWikiAppCreateAccount event_errorText +MobileWikiAppCreateAccount event_loginSessionToken +MobileWikiAppCreateAccount event_source +MobileWikiAppCreateAccount isTruncated +MobileWikiAppCreateAccount webHost +MobileWikiAppCreateAccount wiki +MobileWikiAppCreateAccount userAgent +MobileWikiAppDailyStats webHost +MobileWikiAppDailyStats wiki +MobileWikiAppDailyStats event_appInstallAgeDays +MobileWikiAppDailyStats event_appInstallID +MobileWikiAppDailyStats userAgent +MobileWikiAppEdit clientValidated +MobileWikiAppEdit event_abuseFilterName +MobileWikiAppEdit event_action +MobileWikiAppEdit event_editSessionToken +MobileWikiAppEdit event_editSummaryTapped +MobileWikiAppEdit event_errorText +MobileWikiAppEdit event_pageNS +MobileWikiAppEdit event_revID +MobileWikiAppEdit event_userID +MobileWikiAppEdit event_userName +MobileWikiAppEdit event_wikidataDescriptionEdit +MobileWikiAppEdit isTruncated +MobileWikiAppEdit webHost +MobileWikiAppEdit wiki +MobileWikiAppEdit userAgent +MobileWikiAppInstallReferrer webHost +MobileWikiAppInstallReferrer wiki +MobileWikiAppInstallReferrer userAgent +MobileWikiAppInstallReferrer event_campaign_id +MobileWikiAppInstallReferrer event_referrer_url +MobileWikiAppLangSelect webHost +MobileWikiAppLangSelect wiki +MobileWikiAppLangSelect event_action +MobileWikiAppLangSelect event_appInstallID +MobileWikiAppLangSelect event_newLang +MobileWikiAppLangSelect event_oldLang +MobileWikiAppLangSelect event_sessionToken +MobileWikiAppLangSelect event_source +MobileWikiAppLangSelect event_timeSpent +MobileWikiAppLangSelect userAgent +MobileWikiAppLinkPreview event_action +MobileWikiAppLinkPreview event_appInstallID +MobileWikiAppLinkPreview event_previewSessionToken +MobileWikiAppLinkPreview event_timeSpent +MobileWikiAppLinkPreview event_version +MobileWikiAppLinkPreview webHost +MobileWikiAppLinkPreview wiki +MobileWikiAppLinkPreview userAgent +MobileWikiAppLogin clientValidated +MobileWikiAppLogin event_action +MobileWikiAppLogin event_editSessionToken +MobileWikiAppLogin event_errorText +MobileWikiAppLogin event_loginSessionToken +MobileWikiAppLogin event_source +MobileWikiAppLogin isTruncated +MobileWikiAppLogin webHost +MobileWikiAppLogin wiki +MobileWikiAppLogin userAgent +MobileWikiAppMediaGallery clientValidated +MobileWikiAppMediaGallery event_action +MobileWikiAppMediaGallery event_appInstallID +MobileWikiAppMediaGallery event_gallerySessionToken +MobileWikiAppMediaGallery event_source +MobileWikiAppMediaGallery event_timeSpent +MobileWikiAppMediaGallery isTruncated +MobileWikiAppMediaGallery webHost +MobileWikiAppMediaGallery wiki +MobileWikiAppMediaGallery userAgent +MobileWikiAppNavMenu webHost +MobileWikiAppNavMenu wiki +MobileWikiAppNavMenu userAgent +MobileWikiAppNavMenu event_action +MobileWikiAppNavMenu event_appInstallID +MobileWikiAppNavMenu event_menuItem +MobileWikiAppNavMenu event_sessionToken +MobileWikiAppOnboarding clientValidated +MobileWikiAppOnboarding event_action +MobileWikiAppOnboarding event_onboardingToken +MobileWikiAppOnboarding isTruncated +MobileWikiAppOnboarding webHost +MobileWikiAppOnboarding wiki +MobileWikiAppOnboarding userAgent +MobileWikiAppProtectedEditAttempt clientValidated +MobileWikiAppProtectedEditAttempt event_protectionStatus +MobileWikiAppProtectedEditAttempt isTruncated +MobileWikiAppProtectedEditAttempt webHost +MobileWikiAppProtectedEditAttempt wiki +MobileWikiAppProtectedEditAttempt userAgent +MobileWikiAppSavedPages clientValidated +MobileWikiAppSavedPages event_action +MobileWikiAppSavedPages event_appInstallID +MobileWikiAppSavedPages event_savedPagesAppInstallToken +MobileWikiAppSavedPages isTruncated +MobileWikiAppSavedPages webHost +MobileWikiAppSavedPages wiki +MobileWikiAppSavedPages userAgent +MobileWikiAppSearch clientValidated +MobileWikiAppSearch event_action +MobileWikiAppSearch event_appInstallID +MobileWikiAppSearch event_numberOfResults +MobileWikiAppSearch event_searchSessionToken +MobileWikiAppSearch event_timeToDisplayResults +MobileWikiAppSearch event_typeOfSearch +MobileWikiAppSearch event_position +MobileWikiAppSearch event_source +MobileWikiAppSearch isTruncated +MobileWikiAppSearch webHost +MobileWikiAppSearch wiki +MobileWikiAppSearch userAgent +MobileWikiAppSessions clientValidated +MobileWikiAppSessions event_action +MobileWikiAppSessions event_appInstallID +MobileWikiAppSessions event_backPressed +MobileWikiAppSessions event_pagesViewedFromExternal +MobileWikiAppSessions event_pagesViewedFromHistory +MobileWikiAppSessions event_pagesViewedFromLanglink +MobileWikiAppSessions event_pagesViewedFromRandom +MobileWikiAppSessions event_pagesViewedFromSaved +MobileWikiAppSessions event_pagesViewedFromSearch +MobileWikiAppSessions event_totalPagesViewed +MobileWikiAppSessions event_apiMode +MobileWikiAppSessions event_fromBack +MobileWikiAppSessions event_fromDisambig +MobileWikiAppSessions event_fromExternal +MobileWikiAppSessions event_fromHistory +MobileWikiAppSessions event_fromInternal +MobileWikiAppSessions event_fromLanglink +MobileWikiAppSessions event_fromNearby +MobileWikiAppSessions event_fromRandom +MobileWikiAppSessions event_fromReadingList +MobileWikiAppSessions event_fromSearch +MobileWikiAppSessions event_leadLatency +MobileWikiAppSessions event_length +MobileWikiAppSessions event_noDescription +MobileWikiAppSessions event_restLatency +MobileWikiAppSessions event_totalPages +MobileWikiAppSessions isTruncated +MobileWikiAppSessions webHost +MobileWikiAppSessions wiki +MobileWikiAppSessions userAgent +MobileWikiAppShareAFact clientValidated +MobileWikiAppShareAFact event_action +MobileWikiAppShareAFact event_sharemode +MobileWikiAppShareAFact event_shareSessionToken +MobileWikiAppShareAFact event_tutorialFeatureEnabled +MobileWikiAppShareAFact event_tutorialShown +MobileWikiAppShareAFact event_pageID +MobileWikiAppShareAFact event_revID +MobileWikiAppShareAFact event_article +MobileWikiAppShareAFact event_text +MobileWikiAppShareAFact isTruncated +MobileWikiAppShareAFact webHost +MobileWikiAppShareAFact wiki +MobileWikiAppShareAFact userAgent +MobileWikiAppStuffHappens clientValidated +MobileWikiAppStuffHappens event_applicationContext +MobileWikiAppStuffHappens event_failedEndpoint +MobileWikiAppStuffHappens isTruncated +MobileWikiAppStuffHappens webHost +MobileWikiAppStuffHappens wiki +MobileWikiAppStuffHappens userAgent +MobileWikiAppToCInteraction clientValidated +MobileWikiAppToCInteraction event_action +MobileWikiAppToCInteraction event_appInstallID +MobileWikiAppToCInteraction event_numSections +MobileWikiAppToCInteraction event_sectionIndex +MobileWikiAppToCInteraction event_timeSpent +MobileWikiAppToCInteraction event_tocInteractionToken +MobileWikiAppToCInteraction isTruncated +MobileWikiAppToCInteraction webHost +MobileWikiAppToCInteraction wiki +MobileWikiAppToCInteraction userAgent +MobileWikiAppWidgets clientValidated +MobileWikiAppWidgets event_action +MobileWikiAppWidgets event_appInstallID +MobileWikiAppWidgets isTruncated +MobileWikiAppWidgets webHost +MobileWikiAppWidgets wiki +MobileWikiAppWidgets userAgent +NavigationTiming clientValidated +NavigationTiming event_action +NavigationTiming event_connectEnd +NavigationTiming event_connecting +NavigationTiming event_connectStart +NavigationTiming event_dnsLookup +NavigationTiming event_domComplete +NavigationTiming event_domInteractive +NavigationTiming event_domLoading +NavigationTiming event_fetchStart +NavigationTiming event_firstPaint +NavigationTiming event_firstPaintAfterLoad +NavigationTiming event_isAnon +NavigationTiming event_isHttps +NavigationTiming event_loadEventEnd +NavigationTiming event_loadEventStart +NavigationTiming event_loading +NavigationTiming event_mediaWikiLoadComplete +NavigationTiming event_mobileMode +NavigationTiming event_namespaceId +NavigationTiming event_originCountry +NavigationTiming event_originRegion +NavigationTiming event_receiving +NavigationTiming event_redirectCount +NavigationTiming event_redirecting +NavigationTiming event_rendering +NavigationTiming event_requestStart +NavigationTiming event_responseEnd +NavigationTiming event_responseStart +NavigationTiming event_runtime +NavigationTiming event_secureConnectionStart +NavigationTiming event_sending +NavigationTiming event_waiting +NavigationTiming event_domContentLoadedEventEnd +NavigationTiming event_domContentLoadedEventStart +NavigationTiming event_domainLookupEnd +NavigationTiming event_domainLookupStart +NavigationTiming event_firstImage +NavigationTiming event_isHiDPI +NavigationTiming event_isHttp2 +NavigationTiming event_lazyLoadImages +NavigationTiming event_mediaWikiLoadEnd +NavigationTiming event_mediaWikiLoadStart +NavigationTiming event_mediaWikiVersion +NavigationTiming event_netSpeed +NavigationTiming event_redirectEnd +NavigationTiming event_redirectStart +NavigationTiming event_unloadEventEnd +NavigationTiming event_unloadEventStart +NavigationTiming isTruncated +NavigationTiming webHost +NavigationTiming wiki +PageContentSaveComplete clientValidated +PageContentSaveComplete event_isAPI +PageContentSaveComplete event_isMobile +PageContentSaveComplete event_revisionId +PageContentSaveComplete isTruncated +PageContentSaveComplete webHost +PageContentSaveComplete wiki +PageCreation clientValidated +PageCreation event_namespace +PageCreation event_pageId +PageCreation event_revId +PageCreation event_title +PageCreation event_userId +PageCreation event_userText +PageCreation isTruncated +PageCreation webHost +PageCreation wiki +PageDeletion clientValidated +PageDeletion event_comment +PageDeletion event_namespace +PageDeletion event_pageId +PageDeletion event_title +PageDeletion event_userId +PageDeletion event_userText +PageDeletion isTruncated +PageDeletion webHost +PageDeletion wiki +PageMove clientValidated +PageMove event_comment +PageMove event_newNamespace +PageMove event_newTitle +PageMove event_oldNamespace +PageMove event_oldTitle +PageMove event_pageId +PageMove event_redirectId +PageMove event_userId +PageMove event_userText +PageMove isTruncated +PageMove webHost +PageMove wiki +PageRestoration clientValidated +PageRestoration event_comment +PageRestoration event_namespace +PageRestoration event_newPageId +PageRestoration event_oldPageId +PageRestoration event_title +PageRestoration event_userId +PageRestoration event_userText +PageRestoration isTruncated +PageRestoration webHost +PageRestoration wiki +Popups event_sessionToken +Popups event_version +Popups event_pageIdSource +Popups event_namespaceIdSource +Popups event_namespaceIdHover +Popups event_isAnon +Popups event_totalInteractionTime +Popups event_previewType +Popups event_hovercardsSuppressedByGadget +Popups event_perceivedWait +Popups event_editCountBucket +Popups event_previewCountBucket +Popups event_linkInteractionToken +Popups event_pageToken +Popups clientValidated +Popups event_action +Popups event_duration +Popups event_pageTitleHover +Popups event_pageTitleSource +Popups event_popupDelay +Popups event_popupEnabled +Popups event_sessionID +Popups event_checkin +Popups event_api +Popups isTruncated +Popups webHost +Popups wiki +PrefUpdate clientValidated +PrefUpdate event_isDefault +PrefUpdate event_property +PrefUpdate event_saveTimestamp +PrefUpdate event_userId +PrefUpdate event_value +PrefUpdate event_version +PrefUpdate isTruncated +PrefUpdate webHost +PrefUpdate wiki +RelatedArticles webHost +RelatedArticles wiki +RelatedArticles event_clickIndex +RelatedArticles event_eventName +RelatedArticles event_pageId +RelatedArticles event_skin +RelatedArticles event_userSessionToken +SaveTiming clientValidated +SaveTiming event_duration +SaveTiming event_runtime +SaveTiming event_saveTiming +SaveTiming event_mediaWikiVersion +SaveTiming isTruncated +SaveTiming webHost +SaveTiming wiki +ServerSideAccountCreation clientValidated +ServerSideAccountCreation event_campaign +ServerSideAccountCreation event_displayMobile +ServerSideAccountCreation event_isSelfMade +ServerSideAccountCreation event_isStable +ServerSideAccountCreation event_returnTo +ServerSideAccountCreation event_returnToQuery +ServerSideAccountCreation event_token +ServerSideAccountCreation event_userBuckets +ServerSideAccountCreation event_userId +ServerSideAccountCreation event_userName +ServerSideAccountCreation isTruncated +ServerSideAccountCreation webHost +ServerSideAccountCreation wiki +TaskRecommendationClick clientValidated +TaskRecommendationClick event_pageId +TaskRecommendationClick event_setId +TaskRecommendationClick isTruncated +TaskRecommendationClick webHost +TaskRecommendationClick wiki +TaskRecommendation clientValidated +TaskRecommendation event_offset +TaskRecommendation event_pageId +TaskRecommendation event_setId +TaskRecommendationImpression clientValidated +TaskRecommendationImpression event_interface +TaskRecommendationImpression event_offset +TaskRecommendationImpression event_pageId +TaskRecommendationImpression event_setId +TaskRecommendationImpression event_userId +TaskRecommendationImpression isTruncated +TaskRecommendationImpression webHost +TaskRecommendationImpression wiki +TaskRecommendation isTruncated +TaskRecommendationLightbulbClick clientValidated +TaskRecommendationLightbulbClick event_pageId +TaskRecommendationLightbulbClick event_userId +TaskRecommendationLightbulbClick isTruncated +TaskRecommendationLightbulbClick webHost +TaskRecommendationLightbulbClick wiki +TaskRecommendation webHost +TaskRecommendation wiki +TestSearchSatisfaction webHost +TestSearchSatisfaction wiki +TestSearchSatisfaction event_action +TestSearchSatisfaction event_depth +TestSearchSatisfaction event_logId +TestSearchSatisfaction event_pageId +TestSearchSatisfaction event_searchSessionId +VET135171 webHost +VET135171 wiki +VET135171 event_msg +WikimediaBlogVisit clientValidated +WikimediaBlogVisit event_referrerUrl +WikimediaBlogVisit event_requestUrl +WikimediaBlogVisit isTruncated +WikimediaBlogVisit webHost +WikimediaBlogVisit wiki +WikipediaZeroUsage webHost +WikipediaZeroUsage wiki +WikipediaZeroUsage event_action +WikipediaZeroUsage event_net +WikipediaZeroUsage event_time +WikipediaZeroUsage event_xcs +MediaViewer webHost +MediaViewer wiki +MediaViewer event_action +MediaViewer event_samplingFactor +MediaViewer event_variant +MultimediaViewerVersusPageFilePerformance webHost +MultimediaViewerVersusPageFilePerformance wiki +MultimediaViewerVersusPageFilePerformance event_type +MultimediaViewerVersusPageFilePerformance event_duration +MultimediaViewerVersusPageFilePerformance event_cache +MultimediaViewerVersusPageFilePerformance event_windowSize +MultimediaViewerAttribution webHost +MultimediaViewerAttribution wiki +MultimediaViewerAttribution event_authorPresent +MultimediaViewerAttribution event_sourcePresent +MultimediaViewerAttribution event_licensePresent +MultimediaViewerAttribution event_loggedIn +MultimediaViewerAttribution event_samplingFactor +UploadWizardStep webHost +UploadWizardStep wiki +UploadWizardStep event_flowId +UploadWizardStep event_flowPosition +UploadWizardStep event_step +UploadWizardStep event_skipped +UploadWizardFlowEvent webHost +UploadWizardFlowEvent wiki +UploadWizardFlowEvent event_flowId +UploadWizardFlowEvent event_flowPosition +UploadWizardFlowEvent event_event +UploadWizardErrorFlowEvent webHost +UploadWizardErrorFlowEvent wiki +UploadWizardErrorFlowEvent event_flowId +UploadWizardErrorFlowEvent event_flowPosition +UploadWizardErrorFlowEvent event_step +UploadWizardErrorFlowEvent event_code +UploadWizardErrorFlowEvent event_message +UploadWizardExceptionFlowEvent webHost +UploadWizardExceptionFlowEvent wiki +UploadWizardExceptionFlowEvent event_flowId +UploadWizardExceptionFlowEvent event_flowPosition +UploadWizardExceptionFlowEvent event_message +UploadWizardExceptionFlowEvent event_url +UploadWizardExceptionFlowEvent event_line +UploadWizardExceptionFlowEvent event_column +UploadWizardExceptionFlowEvent event_stack +UploadWizardUploadFlowEvent webHost +UploadWizardUploadFlowEvent wiki +UploadWizardUploadFlowEvent event_flowId +UploadWizardUploadFlowEvent event_flowPosition +UploadWizardUploadFlowEvent event_event +UploadWizardUploadFlowEvent event_extension +UploadWizardUploadFlowEvent event_quantity +UploadWizardUploadFlowEvent event_size +UploadWizardUploadFlowEvent event_duration +UploadWizardUploadFlowEvent event_error +MultimediaViewerDuration webHost +MultimediaViewerDuration wiki +MultimediaViewerDuration event_type +MultimediaViewerDuration event_duration +MultimediaViewerDuration event_loggedIn +MultimediaViewerDuration event_country +MultimediaViewerDuration event_samplingFactor +MultimediaViewerDuration event_uploadTimestamp +MultimediaViewerNetworkPerformance webHost +MultimediaViewerNetworkPerformance wiki +MultimediaViewerNetworkPerformance event_type +MultimediaViewerNetworkPerformance event_contentHost +MultimediaViewerNetworkPerformance event_urlHost +MultimediaViewerNetworkPerformance event_status +MultimediaViewerNetworkPerformance event_contentLength +MultimediaViewerNetworkPerformance event_country +MultimediaViewerNetworkPerformance event_isHttps +MultimediaViewerNetworkPerformance event_timestamp +MultimediaViewerNetworkPerformance event_total +MultimediaViewerNetworkPerformance event_redirect +MultimediaViewerNetworkPerformance event_dns +MultimediaViewerNetworkPerformance event_tcp +MultimediaViewerNetworkPerformance event_request +MultimediaViewerNetworkPerformance event_response +MultimediaViewerNetworkPerformance event_cache +MultimediaViewerNetworkPerformance event_age +MultimediaViewerNetworkPerformance event_XCache +MultimediaViewerNetworkPerformance event_XVarnish +MultimediaViewerNetworkPerformance event_varnish1 +MultimediaViewerNetworkPerformance event_varnish2 +MultimediaViewerNetworkPerformance event_varnish3 +MultimediaViewerNetworkPerformance event_varnish4 +MultimediaViewerNetworkPerformance event_varnish1hits +MultimediaViewerNetworkPerformance event_varnish2hits +MultimediaViewerNetworkPerformance event_varnish3hits +MultimediaViewerNetworkPerformance event_varnish4hits +MultimediaViewerNetworkPerformance event_bandwidth +MultimediaViewerNetworkPerformance event_metered +MultimediaViewerNetworkPerformance event_uploadTimestamp +MultimediaViewerNetworkPerformance event_lastModified +MultimediaViewerNetworkPerformance event_imageWidth +UploadWizardTutorialActions webHost +UploadWizardTutorialActions wiki +UploadWizardTutorialActions event_username +UploadWizardTutorialActions event_language +UploadWizardTutorialActions event_action +UploadWizardUploadActions webHost +UploadWizardUploadActions wiki +UploadWizardUploadActions event_username +UploadWizardUploadActions event_language +UploadWizardUploadActions event_filetype +UploadWizardUploadActions event_error +UploadWizardUploadActions event_numuploads +UploadWizardUploadActions event_action +MobileWikiAppFeed userAgent +MobileWikiAppFeed webHost +MobileWikiAppFeed wiki +MobileWikiAppFeed event_action +MobileWikiAppFeed event_age +MobileWikiAppFeed event_appInstallID +MobileWikiAppFeed event_cardType +MobileWikiAppFeed event_position +MobileWikiAppFeed event_sessionToken +MobileWikiAppFeed event_timeSpent +MobileWikiAppIntents userAgent +MobileWikiAppIntents webHost +MobileWikiAppIntents wiki +MobileWikiAppIntents event_action +MobileWikiAppIntents event_appInstallID +MobileWikiAppReadingLists userAgent +MobileWikiAppReadingLists webHost +MobileWikiAppReadingLists wiki +MobileWikiAppReadingLists event_action +MobileWikiAppReadingLists event_addsource +MobileWikiAppReadingLists event_appInstallID +MobileWikiAppReadingLists event_itemcount +MobileWikiAppReadingLists event_listcount +MobileWikiAppTabs userAgent +MobileWikiAppTabs webHost +MobileWikiAppTabs wiki +MobileWikiAppTabs event_action +MobileWikiAppTabs event_appInstallID +MobileWikiAppTabs event_tabCount +MobileWikiAppTabs event_tabIndex +MobileWikiAppTabs event_tabsSessionToken +MobileWikiAppFindInPage userAgent +MobileWikiAppFindInPage webHost +MobileWikiAppFindInPage wiki +MobileWikiAppFindInPage event_appInstallID +MobileWikiAppFindInPage event_numFindNext +MobileWikiAppFindInPage event_numFindPrev +MobileWikiAppFindInPage event_pageHeight +MobileWikiAppFindInPage event_timeSpent +MobileWikiAppPageScroll userAgent +MobileWikiAppPageScroll webHost +MobileWikiAppPageScroll wiki +MobileWikiAppPageScroll event_maxPercentViewed +MobileWikiAppPageScroll event_pageHeight +MobileWikiAppPageScroll event_pageID +MobileWikiAppPageScroll event_scrollFluxDown +MobileWikiAppPageScroll event_scrollFluxUp +MobileWikiAppPageScroll event_timeSpent +MobileWikiAppWiktionaryPopup userAgent +MobileWikiAppWiktionaryPopup webHost +MobileWikiAppWiktionaryPopup wiki +MobileWikiAppWiktionaryPopup event_text +MobileWikiAppWiktionaryPopup event_timeSpent +ReadingDepth webHost +ReadingDepth wiki +ReadingDepth event_action +ReadingDepth event_domInteractiveTime +ReadingDepth event_firstPaintTime +ReadingDepth event_isAnon +ReadingDepth event_namespaceId +ReadingDepth event_pageTitle +ReadingDepth event_pageToken +ReadingDepth event_sessionToken +ReadingDepth event_skin +ReadingDepth event_totalLength +ReadingDepth event_visibleLength diff --git a/modules/role/manifests/mariadb.pp b/modules/role/manifests/mariadb.pp index fcec982..ca83e8c 100644 --- a/modules/role/manifests/mariadb.pp +++ b/modules/role/manifests/mariadb.pp @@ -225,6 +225,17 @@ require => Group['eventlog'], } + require_package('python3-pymysql') + + file { '/usr/local/bin/eventlogging_cleaner': + ensure => present, + owner => 'root', + group => 'root', + mode => '0500', + source => 'puppet:///modules/role/mariadb/eventlogging_cleaner.py', + require => Package['python3-pymysql'], + } + file { '/usr/local/bin/eventlogging_sync.sh': ensure => present, owner => 'root', -- To view, visit https://gerrit.wikimedia.org/r/356383 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I33e312cd39a9860c895897fcb90ed23820ca4dff Gerrit-PatchSet: 29 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Elukey <[email protected]> Gerrit-Reviewer: Elukey <[email protected]> Gerrit-Reviewer: Giuseppe Lavagetto <[email protected]> Gerrit-Reviewer: Jcrespo <[email protected]> Gerrit-Reviewer: Marostegui <[email protected]> Gerrit-Reviewer: Mforns <[email protected]> Gerrit-Reviewer: Ottomata <[email protected]> Gerrit-Reviewer: Volans <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
