This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 36988b60f44933b0c0fc8218a4a2f3d8fc52f848 Author: Huansong Fu <[email protected]> AuthorDate: Thu Jul 7 10:02:34 2022 -0700 Fix a flaky test idle_in_transaction_session_timeout The test gets stuck on ic-tcp because when session 2 does DECLARE CURSOR the QD needs to set up interconnect with all QEs but one of the QE is blocked on a 'suspend' injected fault and couldn't ack. Note that this isn't an issue for ic-proxy even they share the same code path (SetupTCPInterconnect()) because the acks will be handled by the proxy processes instead of the backends. Fix this by using 'sleep' fault injection instead. That also makes it unnecessary to use multiple sessions. Move the test to regress then. --- ...te_gang_idle_in_transaction_session_timeout.out | 65 ---------------------- src/test/isolation2/isolation2_schedule | 1 - ...te_gang_idle_in_transaction_session_timeout.sql | 32 ----------- ...te_gang_idle_in_transaction_session_timeout.out | 49 ++++++++++++++++ src/test/regress/greenplum_schedule | 1 + ...te_gang_idle_in_transaction_session_timeout.sql | 28 ++++++++++ 6 files changed, 78 insertions(+), 98 deletions(-) diff --git a/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out b/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out deleted file mode 100644 index 694c253fab..0000000000 --- a/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out +++ /dev/null @@ -1,65 +0,0 @@ --- GUC idle_in_transaction_session_timeout MUST not take effect on QE, --- this test guard that. --- In this test, session 2 uses a cursor, which will spawn a write gang --- and a read gang. And we set idle_in_transaction_session_timeout --- to 1s, when FETCH is executed, the read gang will suspend 1.5s because --- of the fault injection. However, without the fix, the write gang will be --- terminated 1s later when FETCH is issued due to the timeout of --- idle_in_transaction_session_timeout. So when the reader is going to read the --- shared snapshot, ERROR will be raised. - -1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a); -CREATE -1: INSERT INTO t_idle_trx_timeout VALUES (2),(3); -INSERT 2 -1: SELECT gp_segment_id, * FROM t_idle_trx_timeout; - gp_segment_id | a ----------------+--- - 0 | 2 - 0 | 3 -(2 rows) - -1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; - gp_inject_fault_infinite --------------------------- - Success: -(1 row) -2: SET idle_in_transaction_session_timeout = 1000; -SET -1&: SELECT gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, dbid) FROM gp_segment_configuration where content =0 AND role = 'p'; <waiting ...> -2: BEGIN; -BEGIN -2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout; -DECLARE -2&: FETCH cur; <waiting ...> -1<: <... completed> - gp_wait_until_triggered_fault -------------------------------- - Success: -(1 row) -1: SELECT pg_sleep(1.5); - pg_sleep ----------- - -(1 row) -1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'reset', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; - gp_inject_fault_infinite --------------------------- - Success: -(1 row) -2<: <... completed> - a ---- - 2 -(1 row) -2: FETCH cur; - a ---- - 3 -(1 row) -2: END; -END - -1: DROP TABLE t_idle_trx_timeout; -DROP - diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index a20ee0b513..7a2c8bc317 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -225,7 +225,6 @@ test: segwalrep/die_commit_pending_replication test: enable_autovacuum test: idle_gang_cleaner # test idle_in_transaction_session_timeout -test: write_gang_idle_in_transaction_session_timeout # Tests for FTS test: fts_errors diff --git a/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql b/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql deleted file mode 100644 index 666fc13b90..0000000000 --- a/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql +++ /dev/null @@ -1,32 +0,0 @@ --- GUC idle_in_transaction_session_timeout MUST not take effect on QE, --- this test guard that. --- In this test, session 2 uses a cursor, which will spawn a write gang --- and a read gang. And we set idle_in_transaction_session_timeout --- to 1s, when FETCH is executed, the read gang will suspend 1.5s because --- of the fault injection. However, without the fix, the write gang will be --- terminated 1s later when FETCH is issued due to the timeout of --- idle_in_transaction_session_timeout. So when the reader is going to read the --- shared snapshot, ERROR will be raised. - -1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a); -1: INSERT INTO t_idle_trx_timeout VALUES (2),(3); -1: SELECT gp_segment_id, * FROM t_idle_trx_timeout; - -1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'suspend', dbid) - FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; -2: SET idle_in_transaction_session_timeout = 1000; -1&: SELECT gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, dbid) - FROM gp_segment_configuration where content =0 AND role = 'p'; -2: BEGIN; -2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout; -2&: FETCH cur; -1<: -1: SELECT pg_sleep(1.5); -1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'reset', dbid) - FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; -2<: -2: FETCH cur; -2: END; - -1: DROP TABLE t_idle_trx_timeout; - diff --git a/src/test/regress/expected/write_gang_idle_in_transaction_session_timeout.out b/src/test/regress/expected/write_gang_idle_in_transaction_session_timeout.out new file mode 100644 index 0000000000..cf55e34da6 --- /dev/null +++ b/src/test/regress/expected/write_gang_idle_in_transaction_session_timeout.out @@ -0,0 +1,49 @@ +-- GUC idle_in_transaction_session_timeout MUST not take effect on QE, +-- this test guard that. +-- In this test, DECLARE cursor will spawn a write gang +-- and a read gang. And we set idle_in_transaction_session_timeout +-- to 1s, when FETCH is executed, the read gang will sleep 2s because +-- of the fault injection. However, without the fix, the write gang will be +-- terminated 1s later when FETCH is issued due to the timeout of +-- idle_in_transaction_session_timeout. So when the reader is going to read the +-- shared snapshot, ERROR will be raised. +CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a); +INSERT INTO t_idle_trx_timeout VALUES (2),(3); +SELECT gp_segment_id, * FROM t_idle_trx_timeout; + gp_segment_id | a +---------------+--- + 0 | 2 + 0 | 3 +(2 rows) + +SET idle_in_transaction_session_timeout = 1000; +SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'sleep', '', '', '', 1, 1, 2, dbid) + FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; + gp_inject_fault +----------------- + Success: +(1 row) + +BEGIN; +DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout; +FETCH cur; + a +--- + 2 +(1 row) + +FETCH cur; + a +--- + 3 +(1 row) + +END; +SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'reset', dbid) + FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; + gp_inject_fault +----------------- + Success: +(1 row) + +DROP TABLE t_idle_trx_timeout; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 418f14a783..763b5952c9 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -58,6 +58,7 @@ test: guc_gp test: toast test: misc_jiras test: statement_mem_for_windowagg +test: write_gang_idle_in_transaction_session_timeout # namespace_gp test will show diff if concurrent tests use temporary tables. # So run it separately. diff --git a/src/test/regress/sql/write_gang_idle_in_transaction_session_timeout.sql b/src/test/regress/sql/write_gang_idle_in_transaction_session_timeout.sql new file mode 100644 index 0000000000..859f36a28e --- /dev/null +++ b/src/test/regress/sql/write_gang_idle_in_transaction_session_timeout.sql @@ -0,0 +1,28 @@ +-- GUC idle_in_transaction_session_timeout MUST not take effect on QE, +-- this test guard that. +-- In this test, DECLARE cursor will spawn a write gang +-- and a read gang. And we set idle_in_transaction_session_timeout +-- to 1s, when FETCH is executed, the read gang will sleep 2s because +-- of the fault injection. However, without the fix, the write gang will be +-- terminated 1s later when FETCH is issued due to the timeout of +-- idle_in_transaction_session_timeout. So when the reader is going to read the +-- shared snapshot, ERROR will be raised. + +CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a); +INSERT INTO t_idle_trx_timeout VALUES (2),(3); +SELECT gp_segment_id, * FROM t_idle_trx_timeout; + +SET idle_in_transaction_session_timeout = 1000; +SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'sleep', '', '', '', 1, 1, 2, dbid) + FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; +BEGIN; +DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout; +FETCH cur; +FETCH cur; +END; + +SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'reset', dbid) + FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; + +DROP TABLE t_idle_trx_timeout; + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
