Marostegui has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/332228 )
Change subject: mariadb: Split dbstore role classes ...................................................................... mariadb: Split dbstore role classes * Split the dbstore class out from mariadb.pp into two differnt files * dbstore.pp will be used for the original dbstore hosts that still run TokuDB as an engine. * dbstore2.pp will be used for the new dbstore servers that no longer use TokuDB as an engine but use InnoDB. * dbstore2.my.cnf.erb: Enabled gtid_domain_id flag. Currently this only applies to dbstore2001 and dbstore2002 which already have it enabled manually Bug: T130128 Bug: T150850 Bug: T149418 Change-Id: I2e87640257fc58ec16a02c825927129cfd4520eb --- M manifests/site.pp M modules/role/manifests/mariadb.pp A modules/role/manifests/mariadb/dbstore.pp A modules/role/manifests/mariadb/dbstore2.pp A templates/mariadb/dbstore2.my.cnf.erb 5 files changed, 267 insertions(+), 50 deletions(-) Approvals: Marostegui: Looks good to me, approved jenkins-bot: Verified diff --git a/manifests/site.pp b/manifests/site.pp index 8b1f5a0..ec256bb 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -828,7 +828,7 @@ node 'dbstore2001.codfw.wmnet' { # 24h delay on all repl streams - class { '::role::mariadb::dbstore': + class { 'role::mariadb::dbstore2': lag_warn => 90000, lag_crit => 180000, # Delayed slaves legitimately and cleanly (errno = 0) stop the SQL thread, so diff --git a/modules/role/manifests/mariadb.pp b/modules/role/manifests/mariadb.pp index 1374d56..3d37c0f 100644 --- a/modules/role/manifests/mariadb.pp +++ b/modules/role/manifests/mariadb.pp @@ -479,55 +479,6 @@ } } -# MariaDB 10 slaves replicating all shards -class role::mariadb::dbstore( - $lag_warn = 300, - $lag_crit = 600, - $warn_stopped = true, - ) { - - system::role { 'role::mariadb::dbstore': - description => 'Delayed Slave', - } - - include mariadb::packages_wmf - include mariadb::service - - include standard - include passwords::misc::scripts - - class { 'role::mariadb::grants::production': - password => $passwords::misc::scripts::mysql_root_pass, - prompt => 'DBSTORE', - } - - include role::mariadb::monitor::dba - include passwords::misc::scripts - include role::mariadb::ferm - - class {'role::mariadb::groups': - mysql_group => 'dbstore', - mysql_role => 'slave', - } - - class { 'mariadb::config': - config => 'mariadb/dbstore.my.cnf.erb', - datadir => '/srv/sqldata', - tmpdir => '/srv/tmp', - ssl => 'puppet-cert', - p_s => 'off', - } - - mariadb::monitor_replication { - ['s1','s2','s3','s4','s5','s6','s7','m2','m3','x1']: - is_critical => false, - contact_group => 'admins', # only show on nagios/irc - lag_warn => $lag_warn, - lag_crit => $lag_crit, - warn_stopped => $warn_stopped, - } -} - # MariaDB 10 Analytics all-shards slave, with scratch space and TokuDB # analytics slaves are already either dbstores or eventlogging slaves # so they just need the extra core monitoring diff --git a/modules/role/manifests/mariadb/dbstore.pp b/modules/role/manifests/mariadb/dbstore.pp new file mode 100644 index 0000000..5ef0594 --- /dev/null +++ b/modules/role/manifests/mariadb/dbstore.pp @@ -0,0 +1,48 @@ +# MariaDB 10 slaves replicating all shards and running TokuDB +class role::mariadb::dbstore( + $lag_warn = 300, + $lag_crit = 600, + $warn_stopped = true, + ) { + + system::role { 'role::mariadb::dbstore': + description => 'Delayed Slave', + } + + include mariadb::packages_wmf + include mariadb::service + + include standard + include passwords::misc::scripts + + class { 'role::mariadb::grants::production': + password => $passwords::misc::scripts::mysql_root_pass, + prompt => 'DBSTORE', + } + + include role::mariadb::monitor::dba + include passwords::misc::scripts + include role::mariadb::ferm + + class {'role::mariadb::groups': + mysql_group => 'dbstore', + mysql_role => 'slave', + } + + class { 'mariadb::config': + config => 'mariadb/dbstore.my.cnf.erb', + datadir => '/srv/sqldata', + tmpdir => '/srv/tmp', + ssl => 'puppet-cert', + p_s => 'off', + } + + mariadb::monitor_replication { + ['s1','s2','s3','s4','s5','s6','s7','m2','m3','x1']: + is_critical => false, + contact_group => 'admins', # only show on nagios/irc + lag_warn => $lag_warn, + lag_crit => $lag_crit, + warn_stopped => $warn_stopped, + } +} diff --git a/modules/role/manifests/mariadb/dbstore2.pp b/modules/role/manifests/mariadb/dbstore2.pp new file mode 100644 index 0000000..c6bdbda --- /dev/null +++ b/modules/role/manifests/mariadb/dbstore2.pp @@ -0,0 +1,48 @@ +# MariaDB 10 slaves replicating all shards and running InnoDB +class role::mariadb::dbstore2( + $lag_warn = 300, + $lag_crit = 600, + $warn_stopped = true, + ) { + + system::role { 'role::mariadb::dbstore2': + description => 'Delayed Slave', + } + + include mariadb::packages_wmf + include mariadb::service + + include standard + include passwords::misc::scripts + + class { 'role::mariadb::grants::production': + password => $passwords::misc::scripts::mysql_root_pass, + prompt => 'DBSTORE', + } + + include role::mariadb::monitor::dba + include passwords::misc::scripts + include role::mariadb::ferm + + class {'role::mariadb::groups': + mysql_group => 'dbstore', + mysql_role => 'slave', + } + + class { 'mariadb::config': + config => 'mariadb/dbstore2.my.cnf.erb', + datadir => '/srv/sqldata', + tmpdir => '/srv/tmp', + ssl => 'puppet-cert', + p_s => 'off', + } + + mariadb::monitor_replication { + ['s1','s2','s3','s4','s5','s6','s7','m2','m3','x1']: + is_critical => false, + contact_group => 'admins', # only show on nagios/irc + lag_warn => $lag_warn, + lag_crit => $lag_crit, + warn_stopped => $warn_stopped, + } +} diff --git a/templates/mariadb/dbstore2.my.cnf.erb b/templates/mariadb/dbstore2.my.cnf.erb new file mode 100644 index 0000000..c95b4b8 --- /dev/null +++ b/templates/mariadb/dbstore2.my.cnf.erb @@ -0,0 +1,170 @@ +# dbstore delayed and analytic slaves + +# Please use separate .cnf templates for each type of server. + +[client] +port = 3306 +socket = /tmp/mysql.sock +<% if @ssl == 'on' %> +# ssl +ssl-ca=/etc/mysql/ssl/cacert.pem +ssl-cert=/etc/mysql/ssl/server-cert.pem +ssl-key=/etc/mysql/ssl/server-key.pem +# skip server cert validation until we generate one cert per server +# it would check the cert's common name against the host +# ssl-verify-server-cert +<% elsif @ssl == 'puppet-cert' %> +# ssl +ssl-ca=/etc/ssl/certs/Puppet_Internal_CA.pem +ssl-cert=/etc/mysql/ssl/cert.pem +ssl-key=/etc/mysql/ssl/server.key +ssl-verify-server-cert +<% end %> + +[mysqld] + +skip-external-locking +skip-name-resolve +skip-slave-start +temp-pool + +user = mysql +socket = /tmp/mysql.sock +port = 3306 +datadir = <%= @datadir %> +tmpdir = <%= @tmpdir %> +server_id = <%= @server_id %> +# gtid_domain_id flag is needed for multisource replication and GTID. +# Strictly it is only needed on masters or servers that can potentially be +# masters but for consistency it should be set in all of them. +# https://mariadb.com/kb/en/mariadb/gtid/ +gtid_domain_id = <%= @gtid_domain_id %> + + +read_only = 0 + +# enable socket authentication +plugin-load = unix_socket=auth_socket.so + +secure_file_priv = /dev/null +max_connections = 250 +max_allowed_packet = 32M +connect_timeout = 3 +query_cache_size = 0 +query_cache_type = 0 +event_scheduler = 1 + +table_open_cache = 50000 +table_definition_cache = 50000 +default-storage-engine = InnoDB + +# InnoDB file-per-table + TokuDB love those file descriptors +open-files-limit = 400000 + +character_set_server = binary +character_set_filesystem = binary +collation_server = binary + +innodb_file_per_table = 1 +innodb_buffer_pool_size = 100G +innodb_log_file_size = 4G +innodb_flush_log_at_trx_commit = 0 +innodb_flush_method = O_DIRECT +innodb_thread_concurrency = 0 +innodb_io_capacity = 1000 +innodb_stats_sample_pages = 16 +innodb_stats_method = nulls_unequal +innodb_locks_unsafe_for_binlog = 1 +aria_pagecache_buffer_size = 16G + +# dump and load innodb buffer at start and stop +innodb_buffer_pool_load_at_startup = 1 +innodb_buffer_pool_dump_at_shutdown = 1 + +optimizer_switch='engine_condition_pushdown=on,optimize_join_buffer_size=on' +join_cache_level = 8 + +#plugin-load = ha_tokudb +#tokudb_cache_size = 24G +# Tokudb will stop working when there is less than 1% of free disk space +# (5% by default) +#tokudb_fs_reserve_percent = 1 + +skip-slave-start +slave_transaction_retries = 4294967295 + +s1.replicate-wild-do-table = %wik%.% +s1.replicate-wild-do-table = heartbeat.% +s2.replicate-wild-do-table = %wik%.% +s2.replicate-wild-do-table = heartbeat.% +s3.replicate-wild-do-table = %wik%.% +s3.replicate-wild-do-table = heartbeat.% +s4.replicate-wild-do-table = %wik%.% +s4.replicate-wild-do-table = heartbeat.% +s5.replicate-wild-do-table = %wik%.% +s5.replicate-wild-do-table = heartbeat.% +s6.replicate-wild-do-table = %wik%.% +s6.replicate-wild-do-table = heartbeat.% +s7.replicate-wild-do-table = %wik%.% +s7.replicate-wild-do-table = centralauth.% +s7.replicate-wild-do-table = heartbeat.% +m3.replicate-wild-do-table = phab%.% +m3.replicate-wild-do-table = phlegal%.% +m3.replicate-wild-do-table = heartbeat.% +m4.replicate-wild-do-table = log.% +m4.replicate-wild-do-table = heartbeat.% +x1.replicate-wild-do-table = flowdb.% +x1.replicate-wild-do-table = wikishared.% +x1.replicate-wild-do-table = heartbeat.% + +<% if @kernelversion < "3.19" %> +# Until kernel 3.16 http://www.spinics.net/lists/stable/msg61873.html +# At least, I think so. For now we need to avoid the an assertion failure on Trusty w/ 3.13 +innodb_use_native_aio = 0 +innodb_read_io_threads = 16 +innodb_write_io_threads = 8 +<% end %> +<% if @ssl == 'on' %> +# ssl +ssl-ca=/etc/mysql/ssl/cacert.pem +ssl-cert=/etc/mysql/ssl/server-cert.pem +ssl-key=/etc/mysql/ssl/server-key.pem +ssl-cipher=TLSv1.2 +<% elsif @ssl == 'puppet-cert' %> +# ssl +ssl-ca=/etc/ssl/certs/Puppet_Internal_CA.pem +ssl-cert=/etc/mysql/ssl/cert.pem +ssl-key=/etc/mysql/ssl/server.key +ssl-cipher=TLSv1.2 +<% end %> +<% if @p_s == 'on' %> +# Enabling performance_schema (disabled by default in MariaDB10) +performance_schema = 1 +# downsizing performance schema memory usage: T99485 +performance_schema_digests_size = -1 +performance_schema_max_thread_instances = 500 +performance_schema_max_cond_instances = 1000 +performance_schema_accounts_size = 300 +performance_schema_hosts_size = 300 +performance_schema_events_statements_history_size = 10 +performance_schema_events_statements_history_long_size = 1000 +performance_schema_events_waits_history_size = 10 +performance_schema_events_waits_history_long_size = 1000 +performance_schema_events_stages_history_size = 10 +performance_schema_events_stages_history_long_size = 1000 +performance_schema_max_mutex_instances = 5000 +performance_schema_max_rwlock_instances = 2000 +performance_schema_max_socket_instances = 500 +performance_schema_max_table_instances = 1000 +<% else %> +# only enable userstat if p_s is disabled +performance_schema = 0 +userstat = 1 +<% end %> + +[mysqldump] + +quick +max_allowed_packet = 32M + +#!includedir /etc/mysql/conf.d/ -- To view, visit https://gerrit.wikimedia.org/r/332228 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2e87640257fc58ec16a02c825927129cfd4520eb Gerrit-PatchSet: 4 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Marostegui <maroste...@wikimedia.org> Gerrit-Reviewer: Jcrespo <jcre...@wikimedia.org> Gerrit-Reviewer: Marostegui <maroste...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits