Marostegui has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/332228 )
Change subject: WIP: Split dbstore role ...................................................................... WIP: Split dbstore role Maybe it is a good idea to split the dbstore role into two. One for codfw and one for eqiad, at least for now that some of the dbstore servers run tokudb and some others do not. Change-Id: I2e87640257fc58ec16a02c825927129cfd4520eb --- M manifests/site.pp A modules/role/manifests/mariadb/dbstore2.pp A templates/mariadb/dbstore2.my.cnf.erb 3 files changed, 213 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/28/332228/1 diff --git a/manifests/site.pp b/manifests/site.pp index 9bf2cbf..e846586 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -828,7 +828,7 @@ node 'dbstore2001.codfw.wmnet' { # 24h delay on all repl streams - class { 'role::mariadb::dbstore': + class { 'role::mariadb::dbstore2': lag_warn => 90000, lag_crit => 180000, # Delayed slaves legitimately and cleanly (errno = 0) stop the SQL thread, so diff --git a/modules/role/manifests/mariadb/dbstore2.pp b/modules/role/manifests/mariadb/dbstore2.pp new file mode 100644 index 0000000..97268d6 --- /dev/null +++ b/modules/role/manifests/mariadb/dbstore2.pp @@ -0,0 +1,49 @@ +# MariaDB 10 slaves replicating all shards +# These are the slaves in codfw +class role::mariadb::dbstore2( + $lag_warn = 300, + $lag_crit = 600, + $warn_stopped = true, + ) { + + system::role { 'role::mariadb::dbstore2': + description => 'Delayed Slave', + } + + include mariadb::packages_wmf + include mariadb::service + + include standard + include passwords::misc::scripts + + class { 'role::mariadb::grants::production': + password => $passwords::misc::scripts::mysql_root_pass, + prompt => 'DBSTORE', + } + + include role::mariadb::monitor::dba + include passwords::misc::scripts + include role::mariadb::ferm + + class {'role::mariadb::groups': + mysql_group => 'dbstore', + mysql_role => 'slave', + } + + class { 'mariadb::config': + config => 'mariadb/dbstore2.my.cnf.erb', + datadir => '/srv/sqldata', + tmpdir => '/srv/tmp', + ssl => 'puppet-cert', + p_s => 'off', + } + + mariadb::monitor_replication { + ['s1','s2','s3','s4','s5','s6','s7','m2','m3','x1']: + is_critical => false, + contact_group => 'admins', # only show on nagios/irc + lag_warn => $lag_warn, + lag_crit => $lag_crit, + warn_stopped => $warn_stopped, + } +} diff --git a/templates/mariadb/dbstore2.my.cnf.erb b/templates/mariadb/dbstore2.my.cnf.erb new file mode 100644 index 0000000..80ff776 --- /dev/null +++ b/templates/mariadb/dbstore2.my.cnf.erb @@ -0,0 +1,163 @@ +# dbstore delayed and analytic slaves + +# Please use separate .cnf templates for each type of server. + +[client] +port = 3306 +socket = /tmp/mysql.sock +<% if @ssl == 'on' %> +# ssl +ssl-ca=/etc/mysql/ssl/cacert.pem +ssl-cert=/etc/mysql/ssl/server-cert.pem +ssl-key=/etc/mysql/ssl/server-key.pem +# skip server cert validation until we generate one cert per server +# it would check the cert's common name against the host +# ssl-verify-server-cert +<% elsif @ssl == 'puppet-cert' %> +# ssl +ssl-ca=/etc/ssl/certs/Puppet_Internal_CA.pem +ssl-cert=/etc/mysql/ssl/cert.pem +ssl-key=/etc/mysql/ssl/server.key +ssl-verify-server-cert +<% end %> + +[mysqld] + +skip-external-locking +skip-name-resolve +skip-slave-start +temp-pool + +user = mysql +socket = /tmp/mysql.sock +port = 3306 +datadir = <%= @datadir %> +tmpdir = <%= @tmpdir %> +server_id = <%= @server_id %> +read_only = 0 + +# enable socket authentication +plugin-load = unix_socket=auth_socket.so + +secure_file_priv = /dev/null +max_connections = 250 +max_allowed_packet = 32M +connect_timeout = 3 +query_cache_size = 0 +query_cache_type = 0 +event_scheduler = 1 + +table_open_cache = 50000 +table_definition_cache = 50000 +default-storage-engine = InnoDB + +# InnoDB file-per-table + TokuDB love those file descriptors +open-files-limit = 400000 + +character_set_server = binary +character_set_filesystem = binary +collation_server = binary + +innodb_file_per_table = 1 +innodb_buffer_pool_size = 100G +innodb_log_file_size = 4G +innodb_flush_log_at_trx_commit = 0 +innodb_flush_method = O_DIRECT +innodb_thread_concurrency = 0 +innodb_io_capacity = 1000 +innodb_stats_sample_pages = 16 +innodb_stats_method = nulls_unequal +innodb_locks_unsafe_for_binlog = 1 +aria_pagecache_buffer_size = 16G + +# dump and load innodb buffer at start and stop +innodb_buffer_pool_load_at_startup = 1 +innodb_buffer_pool_dump_at_shutdown = 1 + +optimizer_switch='engine_condition_pushdown=on,optimize_join_buffer_size=on' +join_cache_level = 8 + +#plugin-load = ha_tokudb +#tokudb_cache_size = 24G +# Tokudb will stop working when there is less than 1% of free disk space +# (5% by default) +#tokudb_fs_reserve_percent = 1 + +skip-slave-start +slave_transaction_retries = 4294967295 + +s1.replicate-wild-do-table = %wik%.% +s1.replicate-wild-do-table = heartbeat.% +s2.replicate-wild-do-table = %wik%.% +s2.replicate-wild-do-table = heartbeat.% +s3.replicate-wild-do-table = %wik%.% +s3.replicate-wild-do-table = heartbeat.% +s4.replicate-wild-do-table = %wik%.% +s4.replicate-wild-do-table = heartbeat.% +s5.replicate-wild-do-table = %wik%.% +s5.replicate-wild-do-table = heartbeat.% +s6.replicate-wild-do-table = %wik%.% +s6.replicate-wild-do-table = heartbeat.% +s7.replicate-wild-do-table = %wik%.% +s7.replicate-wild-do-table = centralauth.% +s7.replicate-wild-do-table = heartbeat.% +m3.replicate-wild-do-table = phab%.% +m3.replicate-wild-do-table = phlegal%.% +m3.replicate-wild-do-table = heartbeat.% +m4.replicate-wild-do-table = log.% +m4.replicate-wild-do-table = heartbeat.% +x1.replicate-wild-do-table = flowdb.% +x1.replicate-wild-do-table = wikishared.% +x1.replicate-wild-do-table = heartbeat.% + +<% if @kernelversion < "3.19" %> +# Until kernel 3.16 http://www.spinics.net/lists/stable/msg61873.html +# At least, I think so. For now we need to avoid the an assertion failure on Trusty w/ 3.13 +innodb_use_native_aio = 0 +innodb_read_io_threads = 16 +innodb_write_io_threads = 8 +<% end %> +<% if @ssl == 'on' %> +# ssl +ssl-ca=/etc/mysql/ssl/cacert.pem +ssl-cert=/etc/mysql/ssl/server-cert.pem +ssl-key=/etc/mysql/ssl/server-key.pem +ssl-cipher=TLSv1.2 +<% elsif @ssl == 'puppet-cert' %> +# ssl +ssl-ca=/etc/ssl/certs/Puppet_Internal_CA.pem +ssl-cert=/etc/mysql/ssl/cert.pem +ssl-key=/etc/mysql/ssl/server.key +ssl-cipher=TLSv1.2 +<% end %> +<% if @p_s == 'on' %> +# Enabling performance_schema (disabled by default in MariaDB10) +performance_schema = 1 +# downsizing performance schema memory usage: T99485 +performance_schema_digests_size = -1 +performance_schema_max_thread_instances = 500 +performance_schema_max_cond_instances = 1000 +performance_schema_accounts_size = 300 +performance_schema_hosts_size = 300 +performance_schema_events_statements_history_size = 10 +performance_schema_events_statements_history_long_size = 1000 +performance_schema_events_waits_history_size = 10 +performance_schema_events_waits_history_long_size = 1000 +performance_schema_events_stages_history_size = 10 +performance_schema_events_stages_history_long_size = 1000 +performance_schema_max_mutex_instances = 5000 +performance_schema_max_rwlock_instances = 2000 +performance_schema_max_socket_instances = 500 +performance_schema_max_table_instances = 1000 +<% else %> +# only enable userstat if p_s is disabled +performance_schema = 0 +userstat = 1 +<% end %> + +[mysqldump] + +quick +max_allowed_packet = 32M + +#!includedir /etc/mysql/conf.d/ -- To view, visit https://gerrit.wikimedia.org/r/332228 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2e87640257fc58ec16a02c825927129cfd4520eb Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Marostegui <maroste...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits