Re: [Maria-developers] [Fixed Commit] MDEV-12746 out-of-order retry

2018-02-17 Thread Kristian Nielsen
andrei.el...@pp.inet.fi writes:

> The improved patch is here for more comments if you will have.

The new patch looks fine to me.

 - Kristian.

___
Mailing list: https://launchpad.net/~maria-developers
Post to : maria-developers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~maria-developers
More help   : https://help.launchpad.net/ListHelp


[Maria-developers] [Fixed Commit] MDEV-12746 out-of-order retry

2018-02-14 Thread andrei . elkin
Kristian,

The improved patch is here for more comments if you will have.

Cheers,

Andrei



commit 564a4efc817f9f6c54da00987e4aa5164f4c0d97
Author: Andrei Elkin 
Date:   Fri Feb 9 15:00:23 2018 +0200

MDEV-12746 rpl.rpl_parallel_optimistic_nobinlog fails committing
   out of order at retry

The test failures were of two sorts. One is that the number of retries
what the slave thought as a temporary error exceeded
the default value of the slave retry option.
The 2nd issue was an out of order commit by transactions that
were supposed to error out instead.
Both issues are caused by the same reason that the post-temporary-error
retry did not check possibly already existing error status.

This is mended with refining conditions to retry. Specifically, a retrying
worker checks `rpl_parallel_entry::stop_on_error_sub_id` that
a potential failing predecessor could set to its own sub id.
Now should the member be set the retrying follower errors out with
ER_PRIOR_COMMIT_FAILED.

diff --git a/mysql-test/suite/rpl/r/rpl_parallel_retry.result b/mysql-test/suite/rpl/r/rpl_parallel_retry.result
index c4c56489aa4..66428c94086 100644
--- a/mysql-test/suite/rpl/r/rpl_parallel_retry.result
+++ b/mysql-test/suite/rpl/r/rpl_parallel_retry.result
@@ -120,6 +120,7 @@ connection server_2;
 SET sql_log_bin=0;
 CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
 CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
+CALL mtr.add_suppression("Slave worker thread retried transaction .* in vain, giving up");
 SET sql_log_bin=1;
 SET @old_dbug= @@GLOBAL.debug_dbug;
 SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100";
@@ -340,4 +341,60 @@ include/start_slave.inc
 connection server_1;
 DROP TABLE t1, t2, t3, t4;
 DROP function foo;
+connection server_2;
+connection server_1;
+CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
+connection server_2;
+include/stop_slave.inc
+SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+SET @@GLOBAL.slave_parallel_threads=5;
+SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
+SET @@GLOBAL.slave_parallel_mode='aggressive';
+SET @old_lock_wait_timeout=@@GLOBAL.innodb_lock_wait_timeout;
+SET @@GLOBAL.innodb_lock_wait_timeout=2;
+SET @old_slave_transaction_retries=@@GLOBAL.slave_transaction_retries;
+SET @@GLOBAL.slave_transaction_retries=1;
+# Spoilers on the slave side causing temporary errors
+connect  spoiler_21,127.0.0.1,root,,test,$SLAVE_MYPORT;
+BEGIN;
+INSERT INTO t1 SET a=1,b=2;
+connect  spoiler_22,127.0.0.1,root,,test,$SLAVE_MYPORT;
+BEGIN;
+INSERT INTO t1 SET a=2,b=2;
+# Master payload
+connection server_1;
+SET @@SESSION.GTID_SEQ_NO=1000;
+INSERT INTO t1 SET a=1,b=1;
+SET @@SESSION.GTID_SEQ_NO=1001;
+INSERT INTO t1 SET a=2,b=1;
+# Start slave whose both appliers is destined to being blocked
+connection server_2;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET @@GLOBAL.debug_dbug="+d,rpl_parallel_simulate_wait_at_retry";
+include/start_slave.inc
+# Make sure the 2nd seqno_1001 worker has gotten to waiting
+# Signal to the 1st to proceed after it has reached termination state
+SET @@DEBUG_SYNC='now SIGNAL proceed_by_1000';
+connection spoiler_21;
+ROLLBACK;
+# Release the 2nd worker to proceed
+connection spoiler_22;
+ROLLBACK;
+connection server_2;
+SET @@DEBUG_SYNC='now SIGNAL proceed_by_1001';
+# observe how it all ends
+# Wait for the workers to go home and check the result of applying
+# which is OK
+connection server_2;
+include/stop_slave.inc
+SET @@GLOBAL.slave_parallel_threads=@old_parallel_threads;
+SET @@GLOBAL.slave_parallel_mode=@old_parallel_mode;
+SET @@GLOBAL.innodb_lock_wait_timeout=@old_lock_wait_timeout;
+SET @@GLOBAL.slave_transaction_retries=@old_slave_transaction_retries;
+SET @@GLOBAL.debug_dbug=@old_dbug;
+SET debug_sync='RESET';
+include/start_slave.inc
+connection server_1;
+DROP TABLE t1;
+connection server_2;
 include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_parallel_retry.test b/mysql-test/suite/rpl/t/rpl_parallel_retry.test
index b3a8ea45cf0..96863f9021d 100644
--- a/mysql-test/suite/rpl/t/rpl_parallel_retry.test
+++ b/mysql-test/suite/rpl/t/rpl_parallel_retry.test
@@ -128,6 +128,7 @@ SELECT * FROM t1 ORDER BY a;
 SET sql_log_bin=0;
 CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
 CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
+CALL mtr.add_suppression("Slave worker thread retried transaction .* in vain, giving up");
 SET sql_log_bin=1;
 
 SET @old_dbug= @@GLOBAL.debug_dbug;
@@ -371,7 +372,7 @@ SELECT * FROM t3 ORDER BY a;
 SET binlog_format=@old_format;
 
 
-# Clean up.
+# Clean up of the above part.
 --connection server_2
 --source include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=