Partially revert this commit:
commit 6a1cb449feb1b77e5ec94904c228d7c5477f528a
Author: Sergei Golubchik <[email protected]>
Date: Mon Jan 18 18:02:16 2021 +0100
cleanup: remove slave background thread, use handle_manager thread instead
This restores running the parallel replication deadlock killing in its own
dedicated thread, not in the manager thread shared with other unrelated
processing.
When a parallel replication conflict is detected, multiple threads can be
waiting for each other, potentially in a loop. It is critical for
correctness (as well as performance) that the blocking thread is killed
immediately to allow other threads to continue. If one of the threads being
blocked was the manager thread itself in some unrelated job, the kill could
end up being blocked indefinitely, causing replication to hang, usually
eventually timing out on innodb_lock_wait_timeout and failing replication
with an error like:
[ERROR] Slave worker thread retried transaction 10 time(s) in vain, giving up
Signed-off-by: Kristian Nielsen <[email protected]>
---
.../suite/perfschema/r/threads_mysql.result | 12 ++
.../r/pr_ps_setup_show_enabled.result | 2 +
sql/mysqld.cc | 20 ++-
sql/mysqld.h | 6 +-
sql/slave.cc | 138 ++++++++++++++++--
5 files changed, 159 insertions(+), 19 deletions(-)
diff --git a/mysql-test/suite/perfschema/r/threads_mysql.result
b/mysql-test/suite/perfschema/r/threads_mysql.result
index 20c119da31f..64c48e4e878 100644
--- a/mysql-test/suite/perfschema/r/threads_mysql.result
+++ b/mysql-test/suite/perfschema/r/threads_mysql.result
@@ -58,6 +58,17 @@ connection_type NULL
unified_parent_thread_id unified parent_thread_id
role NULL
instrumented YES
+name thread/sql/slave_deadlock_handler
+type BACKGROUND
+processlist_user NULL
+processlist_host NULL
+processlist_db NULL
+processlist_command NULL
+processlist_info NULL
+connection_type NULL
+unified_parent_thread_id unified parent_thread_id
+role NULL
+instrumented YES
CREATE TEMPORARY TABLE t1 AS
SELECT thread_id FROM performance_schema.threads
WHERE name LIKE 'thread/sql%';
@@ -120,4 +131,5 @@ thread/sql/event_scheduler thread/sql/event_worker
thread/sql/main thread/sql/manager
thread/sql/main thread/sql/one_connection
thread/sql/main thread/sql/signal_handler
+thread/sql/main thread/sql/slave_deadlock_handler
thread/sql/one_connection thread/sql/event_scheduler
diff --git a/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result
b/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result
index 261ccb191ed..8ba5f273767 100644
--- a/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result
+++ b/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result
@@ -209,6 +209,7 @@ mysys/statement_timer BACKGROUND
root@localhost FOREGROUND
sql/main BACKGROUND
sql/manager BACKGROUND
+sql/slave_deadlock_handler BACKGROUND
CALL sys.ps_setup_show_enabled(TRUE, TRUE);
performance_schema_enabled
1
@@ -240,6 +241,7 @@ mysys/statement_timer BACKGROUND
root@localhost FOREGROUND
sql/main BACKGROUND
sql/manager BACKGROUND
+sql/slave_deadlock_handler BACKGROUND
enabled_instruments timed
idle YES
memory/performance_schema/accounts NO
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 5707ce90349..f4f0ba33938 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -346,6 +346,7 @@ static char compiled_default_collation_name[]=
MYSQL_DEFAULT_COLLATION_NAME;
Thread_cache thread_cache;
static bool binlog_format_used= false;
LEX_STRING opt_init_connect, opt_init_slave;
+mysql_cond_t COND_slave_deadlock_handler;
static DYNAMIC_ARRAY all_options;
static longlong start_memory_used;
@@ -697,7 +698,8 @@ mysql_mutex_t
LOCK_crypt,
LOCK_global_system_variables,
LOCK_user_conn,
- LOCK_error_messages;
+ LOCK_error_messages,
+ LOCK_slave_deadlock_handler;
mysql_mutex_t LOCK_stats, LOCK_global_user_client_stats,
LOCK_global_table_stats, LOCK_global_index_stats;
@@ -920,7 +922,8 @@ PSI_mutex_key key_LOCK_stats,
PSI_mutex_key key_LOCK_gtid_waiting;
PSI_mutex_key key_LOCK_after_binlog_sync;
-PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered;
+PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered,
+ key_LOCK_slave_deadlock_handler;
PSI_mutex_key key_TABLE_SHARE_LOCK_share;
PSI_mutex_key key_TABLE_SHARE_LOCK_statistics;
PSI_mutex_key key_LOCK_ack_receiver;
@@ -996,6 +999,7 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL},
{ &key_LOCK_after_binlog_sync, "LOCK_after_binlog_sync", PSI_FLAG_GLOBAL},
{ &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL},
+ { &key_LOCK_slave_deadlock_handler, "LOCK_slave_deadlock_handler",
PSI_FLAG_GLOBAL},
{ &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0},
{ &key_LOCK_slave_state, "LOCK_slave_state", 0},
{ &key_LOCK_start_thread, "LOCK_start_thread", PSI_FLAG_GLOBAL},
@@ -1064,7 +1068,7 @@ PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy;
PSI_cond_key key_COND_rpl_thread_queue, key_COND_rpl_thread,
key_COND_rpl_thread_stop, key_COND_rpl_thread_pool,
key_COND_parallel_entry, key_COND_group_commit_orderer,
- key_COND_prepare_ordered;
+ key_COND_prepare_ordered, key_COND_slave_deadlock_handler;
PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates;
PSI_cond_key key_COND_ack_receiver;
@@ -1110,6 +1114,7 @@ static PSI_cond_info all_server_conds[]=
{ &key_COND_parallel_entry, "COND_parallel_entry", 0},
{ &key_COND_group_commit_orderer, "COND_group_commit_orderer", 0},
{ &key_COND_prepare_ordered, "COND_prepare_ordered", 0},
+ { &key_COND_slave_deadlock_handler, "COND_slave_deadlock_handler", 0},
{ &key_COND_start_thread, "COND_start_thread", PSI_FLAG_GLOBAL},
{ &key_COND_wait_gtid, "COND_wait_gtid", 0},
{ &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0},
@@ -1121,7 +1126,7 @@ static PSI_cond_info all_server_conds[]=
PSI_thread_key key_thread_delayed_insert,
key_thread_handle_manager, key_thread_main,
key_thread_one_connection, key_thread_signal_hand,
- key_thread_slave_background, key_rpl_parallel_thread;
+ key_thread_slave_deadlock_handler, key_rpl_parallel_thread;
PSI_thread_key key_thread_ack_receiver;
static PSI_thread_info all_server_threads[]=
@@ -1131,7 +1136,7 @@ static PSI_thread_info all_server_threads[]=
{ &key_thread_main, "main", PSI_FLAG_GLOBAL},
{ &key_thread_one_connection, "one_connection", 0},
{ &key_thread_signal_hand, "signal_handler", PSI_FLAG_GLOBAL},
- { &key_thread_slave_background, "slave_background", PSI_FLAG_GLOBAL},
+ { &key_thread_slave_deadlock_handler, "slave_deadlock_handler",
PSI_FLAG_GLOBAL},
{ &key_thread_ack_receiver, "Ack_receiver", PSI_FLAG_GLOBAL},
{ &key_rpl_parallel_thread, "rpl_parallel_thread", 0}
};
@@ -2115,6 +2120,8 @@ static void clean_up_mutexes()
mysql_cond_destroy(&COND_prepare_ordered);
mysql_mutex_destroy(&LOCK_after_binlog_sync);
mysql_mutex_destroy(&LOCK_commit_ordered);
+ mysql_mutex_destroy(&LOCK_slave_deadlock_handler);
+ mysql_cond_destroy(&COND_slave_deadlock_handler);
#ifndef EMBEDDED_LIBRARY
mysql_mutex_destroy(&LOCK_error_log);
#endif
@@ -4385,6 +4392,9 @@ static int init_thread_environment()
MY_MUTEX_INIT_SLOW);
mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered,
MY_MUTEX_INIT_SLOW);
+ mysql_mutex_init(key_LOCK_slave_deadlock_handler,
&LOCK_slave_deadlock_handler,
+ MY_MUTEX_INIT_SLOW);
+ mysql_cond_init(key_COND_slave_deadlock_handler,
&COND_slave_deadlock_handler, NULL);
mysql_mutex_init(key_LOCK_backup_log, &LOCK_backup_log, MY_MUTEX_INIT_FAST);
#ifdef HAVE_OPENSSL
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 41dea67707d..48993a9eb78 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -384,7 +384,7 @@ extern PSI_cond_key key_TABLE_SHARE_COND_rotation;
extern PSI_thread_key key_thread_delayed_insert,
key_thread_handle_manager, key_thread_kill_server, key_thread_main,
key_thread_one_connection, key_thread_signal_hand,
- key_thread_slave_background, key_rpl_parallel_thread;
+ key_thread_slave_deadlock_handler, key_rpl_parallel_thread;
extern PSI_file_key key_file_binlog, key_file_binlog_cache,
key_file_binlog_index, key_file_binlog_index_cache, key_file_casetest,
@@ -753,7 +753,8 @@ extern mysql_mutex_t
LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator,
LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
LOCK_active_mi, LOCK_manager, LOCK_user_conn,
- LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_backup_log;
+ LOCK_prepared_stmt_count, LOCK_error_messages,
+ LOCK_slave_deadlock_handler, LOCK_backup_log;
extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_global_system_variables;
extern mysql_rwlock_t LOCK_all_status_vars;
extern mysql_mutex_t LOCK_start_thread;
@@ -764,6 +765,7 @@ extern mysql_rwlock_t LOCK_ssl_refresh;
extern mysql_prlock_t LOCK_system_variables_hash;
extern mysql_cond_t COND_start_thread;
extern mysql_cond_t COND_manager;
+extern mysql_cond_t COND_slave_deadlock_handler;
extern my_bool opt_use_ssl;
extern char *opt_ssl_ca, *opt_ssl_capath, *opt_ssl_cert, *opt_ssl_cipher,
diff --git a/sql/slave.cc b/sql/slave.cc
index de1476e5925..32817e58e45 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -479,8 +479,114 @@ static void bg_gtid_pos_auto_create(void *hton)
plugin_unlock(NULL, engine);
}
+static bool slave_deadlock_handler_thread_running;
+static bool slave_deadlock_handler_thread_stop;
static bool slave_background_thread_gtid_loaded;
+struct slave_deadlock_kill_t {
+ slave_deadlock_kill_t *next;
+ THD *to_kill;
+} *slave_deadlock_kill_list;
+
+
+pthread_handler_t
+slave_deadlock_handler(void *arg __attribute__((unused)))
+{
+ bool stop;
+
+ my_thread_init();
+
+ mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+ slave_deadlock_handler_thread_running= true;
+ mysql_cond_broadcast(&COND_slave_deadlock_handler);
+ do
+ {
+ slave_deadlock_kill_t *kill_list;
+
+ for (;;)
+ {
+ stop= abort_loop || slave_deadlock_handler_thread_stop;
+ kill_list= slave_deadlock_kill_list;
+ if (stop || kill_list)
+ break;
+ mysql_cond_wait(&COND_slave_deadlock_handler,
&LOCK_slave_deadlock_handler);
+ }
+
+ slave_deadlock_kill_list= NULL;
+ mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+
+ while (kill_list)
+ {
+ slave_deadlock_kill_t *p = kill_list;
+ THD *to_kill= p->to_kill;
+ kill_list= p->next;
+
+ DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000););
+ to_kill->awake(KILL_CONNECTION);
+ mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
+ to_kill->rgi_slave->killed_for_retry=
+ rpl_group_info::RETRY_KILL_KILLED;
+ mysql_cond_broadcast(&to_kill->COND_wakeup_ready);
+ mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready);
+ my_free(p);
+ }
+ mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+ } while (!stop);
+
+ slave_deadlock_handler_thread_running= false;
+ mysql_cond_broadcast(&COND_slave_deadlock_handler);
+ mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+
+ my_thread_end();
+ return 0;
+}
+
+
+/*
+ Start the slave deadlock handler thread.
+
+ This thread is used to kill worker thread transactions during parallel
+ replication, when a storage engine attempts to take an errorneous
+ conflicting lock that would cause a deadlock. Killing is done
+ asynchroneously, as the kill may not be safe within the context of a
+ callback from inside storage engine locking code.
+*/
+static int
+start_slave_deadlock_handler_thread()
+{
+ pthread_t th;
+
+ mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+ slave_deadlock_handler_thread_running= false;
+ slave_deadlock_handler_thread_stop= false;
+ if (mysql_thread_create(key_thread_slave_deadlock_handler,
+ &th, &connection_attrib, slave_deadlock_handler,
+ NULL))
+ {
+ sql_print_error("Failed to create thread while initialising slave");
+ return 1;
+ }
+
+ while (!slave_deadlock_handler_thread_running)
+ mysql_cond_wait(&COND_slave_deadlock_handler,
&LOCK_slave_deadlock_handler);
+ mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+
+ return 0;
+}
+
+
+static void
+stop_slave_deadlock_handler_thread()
+{
+ mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+ slave_deadlock_handler_thread_stop= true;
+ mysql_cond_broadcast(&COND_slave_deadlock_handler);
+ while (slave_deadlock_handler_thread_running)
+ mysql_cond_wait(&COND_slave_deadlock_handler,
&LOCK_slave_deadlock_handler);
+ mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+}
+
+
static void bg_rpl_load_gtid_slave_state(void *)
{
THD *thd= new_bg_THD();
@@ -501,25 +607,27 @@ static void bg_rpl_load_gtid_slave_state(void *)
delete thd;
}
-static void bg_slave_kill(void *victim)
-{
- THD *to_kill= (THD *)victim;
- DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000););
- to_kill->awake(KILL_CONNECTION);
- mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
- to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
- mysql_cond_broadcast(&to_kill->COND_wakeup_ready);
- mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready);
-}
void slave_background_kill_request(THD *to_kill)
{
if (to_kill->rgi_slave->killed_for_retry)
return; // Already deadlock killed.
- to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_PENDING;
- mysql_manager_submit(bg_slave_kill, to_kill);
+ slave_deadlock_kill_t *p= (slave_deadlock_kill_t *)
+ my_malloc(PSI_INSTRUMENT_ME, sizeof(*p), MYF(MY_WME));
+ if (p)
+ {
+ p->to_kill= to_kill;
+ to_kill->rgi_slave->killed_for_retry=
+ rpl_group_info::RETRY_KILL_PENDING;
+ mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+ p->next= slave_deadlock_kill_list;
+ slave_deadlock_kill_list= p;
+ mysql_cond_signal(&COND_slave_deadlock_handler);
+ mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+ }
}
+
/*
This function must only be called from a slave SQL thread (or worker thread),
to ensure that the table_entry will not go away before we can lock the
@@ -564,6 +672,9 @@ int init_slave()
init_slave_psi_keys();
#endif
+ if (start_slave_deadlock_handler_thread())
+ return 1;
+
if (global_rpl_thread_pool.init(opt_slave_parallel_threads))
return 1;
@@ -1298,6 +1409,7 @@ void slave_prepare_for_shutdown()
mysql_mutex_lock(&LOCK_active_mi);
master_info_index->free_connections();
mysql_mutex_unlock(&LOCK_active_mi);
+ stop_slave_deadlock_handler_thread();
// It's safe to destruct worker pool now when
// all driver threads are gone.
global_rpl_thread_pool.deactivate();
@@ -1331,6 +1443,8 @@ void end_slave()
active_mi= 0;
mysql_mutex_unlock(&LOCK_active_mi);
+ stop_slave_deadlock_handler_thread();
+
global_rpl_thread_pool.destroy();
free_all_rpl_filters();
DBUG_VOID_RETURN;
--
2.47.3
_______________________________________________
commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]