Partially revert this commit:

commit 6a1cb449feb1b77e5ec94904c228d7c5477f528a
Author: Sergei Golubchik <[email protected]>
Date:   Mon Jan 18 18:02:16 2021 +0100

    cleanup: remove slave background thread, use handle_manager thread instead

This restores running the parallel replication deadlock killing in its own
dedicated thread, not in the manager thread shared with other unrelated
processing.

When a parallel replication conflict is detected, multiple threads can be
waiting for each other, potentially in a loop. It is critical for
correctness (as well as performance) that the blocking thread is killed
immediately to allow other threads to continue. If one of the threads being
blocked was the manager thread itself in some unrelated job, the kill could
end up being blocked indefinitely, causing replication to hang, usually
eventually timing out on innodb_lock_wait_timeout and failing replication
with an error like:

[ERROR] Slave worker thread retried transaction 10 time(s) in vain, giving up

Signed-off-by: Kristian Nielsen <[email protected]>
---
 .../suite/perfschema/r/threads_mysql.result   |  12 ++
 .../r/pr_ps_setup_show_enabled.result         |   2 +
 sql/mysqld.cc                                 |  20 ++-
 sql/mysqld.h                                  |   6 +-
 sql/slave.cc                                  | 138 ++++++++++++++++--
 5 files changed, 159 insertions(+), 19 deletions(-)

diff --git a/mysql-test/suite/perfschema/r/threads_mysql.result 
b/mysql-test/suite/perfschema/r/threads_mysql.result
index 20c119da31f..64c48e4e878 100644
--- a/mysql-test/suite/perfschema/r/threads_mysql.result
+++ b/mysql-test/suite/perfschema/r/threads_mysql.result
@@ -58,6 +58,17 @@ connection_type      NULL
 unified_parent_thread_id       unified parent_thread_id
 role   NULL
 instrumented   YES
+name   thread/sql/slave_deadlock_handler
+type   BACKGROUND
+processlist_user       NULL
+processlist_host       NULL
+processlist_db NULL
+processlist_command    NULL
+processlist_info       NULL
+connection_type        NULL
+unified_parent_thread_id       unified parent_thread_id
+role   NULL
+instrumented   YES
 CREATE TEMPORARY TABLE t1 AS
 SELECT thread_id FROM performance_schema.threads
 WHERE name LIKE 'thread/sql%';
@@ -120,4 +131,5 @@ thread/sql/event_scheduler  thread/sql/event_worker
 thread/sql/main        thread/sql/manager
 thread/sql/main        thread/sql/one_connection
 thread/sql/main        thread/sql/signal_handler
+thread/sql/main        thread/sql/slave_deadlock_handler
 thread/sql/one_connection      thread/sql/event_scheduler
diff --git a/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result 
b/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result
index 261ccb191ed..8ba5f273767 100644
--- a/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result
+++ b/mysql-test/suite/sysschema/r/pr_ps_setup_show_enabled.result
@@ -209,6 +209,7 @@ mysys/statement_timer       BACKGROUND
 root@localhost FOREGROUND
 sql/main       BACKGROUND
 sql/manager    BACKGROUND
+sql/slave_deadlock_handler     BACKGROUND
 CALL sys.ps_setup_show_enabled(TRUE, TRUE);
 performance_schema_enabled
 1
@@ -240,6 +241,7 @@ mysys/statement_timer       BACKGROUND
 root@localhost FOREGROUND
 sql/main       BACKGROUND
 sql/manager    BACKGROUND
+sql/slave_deadlock_handler     BACKGROUND
 enabled_instruments    timed
 idle   YES
 memory/performance_schema/accounts     NO
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 5707ce90349..f4f0ba33938 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -346,6 +346,7 @@ static char compiled_default_collation_name[]= 
MYSQL_DEFAULT_COLLATION_NAME;
 Thread_cache thread_cache;
 static bool binlog_format_used= false;
 LEX_STRING opt_init_connect, opt_init_slave;
+mysql_cond_t COND_slave_deadlock_handler;
 static DYNAMIC_ARRAY all_options;
 static longlong start_memory_used;
 
@@ -697,7 +698,8 @@ mysql_mutex_t
   LOCK_crypt,
   LOCK_global_system_variables,
   LOCK_user_conn,
-  LOCK_error_messages;
+  LOCK_error_messages,
+  LOCK_slave_deadlock_handler;
 mysql_mutex_t LOCK_stats, LOCK_global_user_client_stats,
               LOCK_global_table_stats, LOCK_global_index_stats;
 
@@ -920,7 +922,8 @@ PSI_mutex_key key_LOCK_stats,
 PSI_mutex_key key_LOCK_gtid_waiting;
 
 PSI_mutex_key key_LOCK_after_binlog_sync;
-PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered;
+PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered,
+  key_LOCK_slave_deadlock_handler;
 PSI_mutex_key key_TABLE_SHARE_LOCK_share;
 PSI_mutex_key key_TABLE_SHARE_LOCK_statistics;
 PSI_mutex_key key_LOCK_ack_receiver;
@@ -996,6 +999,7 @@ static PSI_mutex_info all_server_mutexes[]=
   { &key_LOCK_prepare_ordered, "LOCK_prepare_ordered", PSI_FLAG_GLOBAL},
   { &key_LOCK_after_binlog_sync, "LOCK_after_binlog_sync", PSI_FLAG_GLOBAL},
   { &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL},
+  { &key_LOCK_slave_deadlock_handler, "LOCK_slave_deadlock_handler", 
PSI_FLAG_GLOBAL},
   { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0},
   { &key_LOCK_slave_state, "LOCK_slave_state", 0},
   { &key_LOCK_start_thread, "LOCK_start_thread", PSI_FLAG_GLOBAL},
@@ -1064,7 +1068,7 @@ PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy;
 PSI_cond_key key_COND_rpl_thread_queue, key_COND_rpl_thread,
   key_COND_rpl_thread_stop, key_COND_rpl_thread_pool,
   key_COND_parallel_entry, key_COND_group_commit_orderer,
-  key_COND_prepare_ordered;
+  key_COND_prepare_ordered, key_COND_slave_deadlock_handler;
 PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates;
 PSI_cond_key key_COND_ack_receiver;
 
@@ -1110,6 +1114,7 @@ static PSI_cond_info all_server_conds[]=
   { &key_COND_parallel_entry, "COND_parallel_entry", 0},
   { &key_COND_group_commit_orderer, "COND_group_commit_orderer", 0},
   { &key_COND_prepare_ordered, "COND_prepare_ordered", 0},
+  { &key_COND_slave_deadlock_handler, "COND_slave_deadlock_handler", 0},
   { &key_COND_start_thread, "COND_start_thread", PSI_FLAG_GLOBAL},
   { &key_COND_wait_gtid, "COND_wait_gtid", 0},
   { &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0},
@@ -1121,7 +1126,7 @@ static PSI_cond_info all_server_conds[]=
 PSI_thread_key key_thread_delayed_insert,
   key_thread_handle_manager, key_thread_main,
   key_thread_one_connection, key_thread_signal_hand,
-  key_thread_slave_background, key_rpl_parallel_thread;
+  key_thread_slave_deadlock_handler, key_rpl_parallel_thread;
 PSI_thread_key key_thread_ack_receiver;
 
 static PSI_thread_info all_server_threads[]=
@@ -1131,7 +1136,7 @@ static PSI_thread_info all_server_threads[]=
   { &key_thread_main, "main", PSI_FLAG_GLOBAL},
   { &key_thread_one_connection, "one_connection", 0},
   { &key_thread_signal_hand, "signal_handler", PSI_FLAG_GLOBAL},
-  { &key_thread_slave_background, "slave_background", PSI_FLAG_GLOBAL},
+  { &key_thread_slave_deadlock_handler, "slave_deadlock_handler", 
PSI_FLAG_GLOBAL},
   { &key_thread_ack_receiver, "Ack_receiver", PSI_FLAG_GLOBAL},
   { &key_rpl_parallel_thread, "rpl_parallel_thread", 0}
 };
@@ -2115,6 +2120,8 @@ static void clean_up_mutexes()
   mysql_cond_destroy(&COND_prepare_ordered);
   mysql_mutex_destroy(&LOCK_after_binlog_sync);
   mysql_mutex_destroy(&LOCK_commit_ordered);
+  mysql_mutex_destroy(&LOCK_slave_deadlock_handler);
+  mysql_cond_destroy(&COND_slave_deadlock_handler);
 #ifndef EMBEDDED_LIBRARY
   mysql_mutex_destroy(&LOCK_error_log);
 #endif
@@ -4385,6 +4392,9 @@ static int init_thread_environment()
                    MY_MUTEX_INIT_SLOW);
   mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered,
                    MY_MUTEX_INIT_SLOW);
+  mysql_mutex_init(key_LOCK_slave_deadlock_handler, 
&LOCK_slave_deadlock_handler,
+                   MY_MUTEX_INIT_SLOW);
+  mysql_cond_init(key_COND_slave_deadlock_handler, 
&COND_slave_deadlock_handler, NULL);
   mysql_mutex_init(key_LOCK_backup_log, &LOCK_backup_log, MY_MUTEX_INIT_FAST);
 
 #ifdef HAVE_OPENSSL
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 41dea67707d..48993a9eb78 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -384,7 +384,7 @@ extern PSI_cond_key key_TABLE_SHARE_COND_rotation;
 extern PSI_thread_key key_thread_delayed_insert,
   key_thread_handle_manager, key_thread_kill_server, key_thread_main,
   key_thread_one_connection, key_thread_signal_hand,
-  key_thread_slave_background, key_rpl_parallel_thread;
+  key_thread_slave_deadlock_handler, key_rpl_parallel_thread;
 
 extern PSI_file_key key_file_binlog, key_file_binlog_cache,
        key_file_binlog_index, key_file_binlog_index_cache, key_file_casetest,
@@ -753,7 +753,8 @@ extern mysql_mutex_t
        LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator,
        LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
        LOCK_active_mi, LOCK_manager, LOCK_user_conn,
-       LOCK_prepared_stmt_count, LOCK_error_messages,  LOCK_backup_log;
+       LOCK_prepared_stmt_count, LOCK_error_messages,
+       LOCK_slave_deadlock_handler, LOCK_backup_log;
 extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_global_system_variables;
 extern mysql_rwlock_t LOCK_all_status_vars;
 extern mysql_mutex_t LOCK_start_thread;
@@ -764,6 +765,7 @@ extern mysql_rwlock_t LOCK_ssl_refresh;
 extern mysql_prlock_t LOCK_system_variables_hash;
 extern mysql_cond_t COND_start_thread;
 extern mysql_cond_t COND_manager;
+extern mysql_cond_t COND_slave_deadlock_handler;
 
 extern my_bool opt_use_ssl;
 extern char *opt_ssl_ca, *opt_ssl_capath, *opt_ssl_cert, *opt_ssl_cipher,
diff --git a/sql/slave.cc b/sql/slave.cc
index de1476e5925..32817e58e45 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -479,8 +479,114 @@ static void bg_gtid_pos_auto_create(void *hton)
     plugin_unlock(NULL, engine);
 }
 
+static bool slave_deadlock_handler_thread_running;
+static bool slave_deadlock_handler_thread_stop;
 static bool slave_background_thread_gtid_loaded;
 
+struct slave_deadlock_kill_t {
+  slave_deadlock_kill_t *next;
+  THD *to_kill;
+} *slave_deadlock_kill_list;
+
+
+pthread_handler_t
+slave_deadlock_handler(void *arg __attribute__((unused)))
+{
+  bool stop;
+
+  my_thread_init();
+
+  mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+  slave_deadlock_handler_thread_running= true;
+  mysql_cond_broadcast(&COND_slave_deadlock_handler);
+  do
+  {
+    slave_deadlock_kill_t *kill_list;
+
+    for (;;)
+    {
+      stop= abort_loop || slave_deadlock_handler_thread_stop;
+      kill_list= slave_deadlock_kill_list;
+      if (stop || kill_list)
+        break;
+      mysql_cond_wait(&COND_slave_deadlock_handler, 
&LOCK_slave_deadlock_handler);
+    }
+
+    slave_deadlock_kill_list= NULL;
+    mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+
+    while (kill_list)
+    {
+      slave_deadlock_kill_t *p = kill_list;
+      THD *to_kill= p->to_kill;
+      kill_list= p->next;
+
+      DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000););
+      to_kill->awake(KILL_CONNECTION);
+      mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
+      to_kill->rgi_slave->killed_for_retry=
+        rpl_group_info::RETRY_KILL_KILLED;
+      mysql_cond_broadcast(&to_kill->COND_wakeup_ready);
+      mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready);
+      my_free(p);
+    }
+    mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+  } while (!stop);
+
+  slave_deadlock_handler_thread_running= false;
+  mysql_cond_broadcast(&COND_slave_deadlock_handler);
+  mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+
+  my_thread_end();
+  return 0;
+}
+
+
+/*
+  Start the slave deadlock handler thread.
+
+  This thread is used to kill worker thread transactions during parallel
+  replication, when a storage engine attempts to take an errorneous
+  conflicting lock that would cause a deadlock. Killing is done
+  asynchroneously, as the kill may not be safe within the context of a
+  callback from inside storage engine locking code.
+*/
+static int
+start_slave_deadlock_handler_thread()
+{
+  pthread_t th;
+
+  mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+  slave_deadlock_handler_thread_running= false;
+  slave_deadlock_handler_thread_stop= false;
+  if (mysql_thread_create(key_thread_slave_deadlock_handler,
+                          &th, &connection_attrib, slave_deadlock_handler,
+                          NULL))
+  {
+    sql_print_error("Failed to create thread while initialising slave");
+    return 1;
+  }
+
+  while (!slave_deadlock_handler_thread_running)
+    mysql_cond_wait(&COND_slave_deadlock_handler, 
&LOCK_slave_deadlock_handler);
+  mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+
+  return 0;
+}
+
+
+static void
+stop_slave_deadlock_handler_thread()
+{
+  mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+  slave_deadlock_handler_thread_stop= true;
+  mysql_cond_broadcast(&COND_slave_deadlock_handler);
+  while (slave_deadlock_handler_thread_running)
+    mysql_cond_wait(&COND_slave_deadlock_handler, 
&LOCK_slave_deadlock_handler);
+  mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+}
+
+
 static void bg_rpl_load_gtid_slave_state(void *)
 {
   THD *thd= new_bg_THD();
@@ -501,25 +607,27 @@ static void bg_rpl_load_gtid_slave_state(void *)
   delete thd;
 }
 
-static void bg_slave_kill(void *victim)
-{
-  THD *to_kill= (THD *)victim;
-  DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000););
-  to_kill->awake(KILL_CONNECTION);
-  mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
-  to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
-  mysql_cond_broadcast(&to_kill->COND_wakeup_ready);
-  mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready);
-}
 
 void slave_background_kill_request(THD *to_kill)
 {
   if (to_kill->rgi_slave->killed_for_retry)
     return;                                     // Already deadlock killed.
-  to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_PENDING;
-  mysql_manager_submit(bg_slave_kill, to_kill);
+  slave_deadlock_kill_t *p= (slave_deadlock_kill_t *)
+    my_malloc(PSI_INSTRUMENT_ME, sizeof(*p), MYF(MY_WME));
+  if (p)
+  {
+    p->to_kill= to_kill;
+    to_kill->rgi_slave->killed_for_retry=
+      rpl_group_info::RETRY_KILL_PENDING;
+    mysql_mutex_lock(&LOCK_slave_deadlock_handler);
+    p->next= slave_deadlock_kill_list;
+    slave_deadlock_kill_list= p;
+    mysql_cond_signal(&COND_slave_deadlock_handler);
+    mysql_mutex_unlock(&LOCK_slave_deadlock_handler);
+  }
 }
 
+
 /*
   This function must only be called from a slave SQL thread (or worker thread),
   to ensure that the table_entry will not go away before we can lock the
@@ -564,6 +672,9 @@ int init_slave()
   init_slave_psi_keys();
 #endif
 
+  if (start_slave_deadlock_handler_thread())
+    return 1;
+
   if (global_rpl_thread_pool.init(opt_slave_parallel_threads))
     return 1;
 
@@ -1298,6 +1409,7 @@ void slave_prepare_for_shutdown()
   mysql_mutex_lock(&LOCK_active_mi);
   master_info_index->free_connections();
   mysql_mutex_unlock(&LOCK_active_mi);
+  stop_slave_deadlock_handler_thread();
   // It's safe to destruct worker pool now when
   // all driver threads are gone.
   global_rpl_thread_pool.deactivate();
@@ -1331,6 +1443,8 @@ void end_slave()
   active_mi= 0;
   mysql_mutex_unlock(&LOCK_active_mi);
 
+  stop_slave_deadlock_handler_thread();
+
   global_rpl_thread_pool.destroy();
   free_all_rpl_filters();
   DBUG_VOID_RETURN;
-- 
2.47.3

_______________________________________________
commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to