This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 36988b60f44933b0c0fc8218a4a2f3d8fc52f848
Author: Huansong Fu <[email protected]>
AuthorDate: Thu Jul 7 10:02:34 2022 -0700

    Fix a flaky test idle_in_transaction_session_timeout
    
    The test gets stuck on ic-tcp because when session 2 does DECLARE
    CURSOR the QD needs to set up interconnect with all QEs but one of
    the QE is blocked on a 'suspend' injected fault and couldn't ack.
    Note that this isn't an issue for ic-proxy even they share the same
    code path (SetupTCPInterconnect()) because the acks will be handled
    by the proxy processes instead of the backends.
    
    Fix this by using 'sleep' fault injection instead. That also makes
    it unnecessary to use multiple sessions. Move the test to regress
    then.
---
 ...te_gang_idle_in_transaction_session_timeout.out | 65 ----------------------
 src/test/isolation2/isolation2_schedule            |  1 -
 ...te_gang_idle_in_transaction_session_timeout.sql | 32 -----------
 ...te_gang_idle_in_transaction_session_timeout.out | 49 ++++++++++++++++
 src/test/regress/greenplum_schedule                |  1 +
 ...te_gang_idle_in_transaction_session_timeout.sql | 28 ++++++++++
 6 files changed, 78 insertions(+), 98 deletions(-)

diff --git 
a/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out
 
b/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out
deleted file mode 100644
index 694c253fab..0000000000
--- 
a/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out
+++ /dev/null
@@ -1,65 +0,0 @@
--- GUC idle_in_transaction_session_timeout MUST not take effect on QE,
--- this test guard that.
--- In this test, session 2 uses a cursor, which will spawn a write gang
--- and a read gang. And we set idle_in_transaction_session_timeout
--- to 1s, when FETCH is executed, the read gang will suspend 1.5s because
--- of the fault injection. However, without the fix, the write gang will be
--- terminated 1s later when FETCH is issued due to the timeout of
--- idle_in_transaction_session_timeout. So when the reader is going to read the
--- shared snapshot, ERROR will be raised.
-
-1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a);
-CREATE
-1: INSERT INTO t_idle_trx_timeout VALUES (2),(3);
-INSERT 2
-1: SELECT gp_segment_id, * FROM t_idle_trx_timeout;
- gp_segment_id | a 
----------------+---
- 0             | 2 
- 0             | 3 
-(2 rows)
-
-1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'suspend', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
- gp_inject_fault_infinite 
---------------------------
- Success:                 
-(1 row)
-2: SET idle_in_transaction_session_timeout = 1000;
-SET
-1&: SELECT 
gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, 
dbid) FROM gp_segment_configuration where content =0 AND role = 'p';  <waiting 
...>
-2: BEGIN;
-BEGIN
-2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout;
-DECLARE
-2&: FETCH cur;  <waiting ...>
-1<:  <... completed>
- gp_wait_until_triggered_fault 
--------------------------------
- Success:                      
-(1 row)
-1: SELECT pg_sleep(1.5);
- pg_sleep 
-----------
-          
-(1 row)
-1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'reset', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
- gp_inject_fault_infinite 
---------------------------
- Success:                 
-(1 row)
-2<:  <... completed>
- a 
----
- 2 
-(1 row)
-2: FETCH cur;
- a 
----
- 3 
-(1 row)
-2: END;
-END
-
-1: DROP TABLE t_idle_trx_timeout;
-DROP
-
diff --git a/src/test/isolation2/isolation2_schedule 
b/src/test/isolation2/isolation2_schedule
index a20ee0b513..7a2c8bc317 100644
--- a/src/test/isolation2/isolation2_schedule
+++ b/src/test/isolation2/isolation2_schedule
@@ -225,7 +225,6 @@ test: segwalrep/die_commit_pending_replication
 test: enable_autovacuum
 test: idle_gang_cleaner
 # test idle_in_transaction_session_timeout
-test: write_gang_idle_in_transaction_session_timeout
 
 # Tests for FTS
 test: fts_errors
diff --git 
a/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql 
b/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql
deleted file mode 100644
index 666fc13b90..0000000000
--- a/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql
+++ /dev/null
@@ -1,32 +0,0 @@
--- GUC idle_in_transaction_session_timeout MUST not take effect on QE,
--- this test guard that.
--- In this test, session 2 uses a cursor, which will spawn a write gang
--- and a read gang. And we set idle_in_transaction_session_timeout
--- to 1s, when FETCH is executed, the read gang will suspend 1.5s because
--- of the fault injection. However, without the fix, the write gang will be
--- terminated 1s later when FETCH is issued due to the timeout of
--- idle_in_transaction_session_timeout. So when the reader is going to read the
--- shared snapshot, ERROR will be raised.
-
-1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a);
-1: INSERT INTO t_idle_trx_timeout VALUES (2),(3);
-1: SELECT gp_segment_id, * FROM t_idle_trx_timeout;
-
-1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'suspend', dbid)
-    FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
-2: SET idle_in_transaction_session_timeout = 1000;
-1&: SELECT 
gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, dbid)
-     FROM gp_segment_configuration where content =0 AND role = 'p';
-2: BEGIN;
-2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout;
-2&: FETCH cur;
-1<:
-1: SELECT pg_sleep(1.5);
-1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'reset', dbid) 
-    FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
-2<:
-2: FETCH cur;
-2: END;
-
-1: DROP TABLE t_idle_trx_timeout;
-
diff --git 
a/src/test/regress/expected/write_gang_idle_in_transaction_session_timeout.out 
b/src/test/regress/expected/write_gang_idle_in_transaction_session_timeout.out
new file mode 100644
index 0000000000..cf55e34da6
--- /dev/null
+++ 
b/src/test/regress/expected/write_gang_idle_in_transaction_session_timeout.out
@@ -0,0 +1,49 @@
+-- GUC idle_in_transaction_session_timeout MUST not take effect on QE,
+-- this test guard that.
+-- In this test, DECLARE cursor will spawn a write gang
+-- and a read gang. And we set idle_in_transaction_session_timeout
+-- to 1s, when FETCH is executed, the read gang will sleep 2s because
+-- of the fault injection. However, without the fix, the write gang will be
+-- terminated 1s later when FETCH is issued due to the timeout of
+-- idle_in_transaction_session_timeout. So when the reader is going to read the
+-- shared snapshot, ERROR will be raised.
+CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a);
+INSERT INTO t_idle_trx_timeout VALUES (2),(3);
+SELECT gp_segment_id, * FROM t_idle_trx_timeout;
+ gp_segment_id | a 
+---------------+---
+             0 | 2
+             0 | 3
+(2 rows)
+
+SET idle_in_transaction_session_timeout = 1000;
+SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'sleep', '', 
'', '', 1, 1, 2, dbid)
+ FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+ gp_inject_fault 
+-----------------
+ Success:
+(1 row)
+
+BEGIN;
+DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout;
+FETCH cur;
+ a 
+---
+ 2
+(1 row)
+
+FETCH cur;
+ a 
+---
+ 3
+(1 row)
+
+END;
+SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'reset', 
dbid) 
+ FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+ gp_inject_fault 
+-----------------
+ Success:
+(1 row)
+
+DROP TABLE t_idle_trx_timeout;
diff --git a/src/test/regress/greenplum_schedule 
b/src/test/regress/greenplum_schedule
index 418f14a783..763b5952c9 100755
--- a/src/test/regress/greenplum_schedule
+++ b/src/test/regress/greenplum_schedule
@@ -58,6 +58,7 @@ test: guc_gp
 test: toast
 test: misc_jiras
 test: statement_mem_for_windowagg
+test: write_gang_idle_in_transaction_session_timeout
 
 # namespace_gp test will show diff if concurrent tests use temporary tables.
 # So run it separately.
diff --git 
a/src/test/regress/sql/write_gang_idle_in_transaction_session_timeout.sql 
b/src/test/regress/sql/write_gang_idle_in_transaction_session_timeout.sql
new file mode 100644
index 0000000000..859f36a28e
--- /dev/null
+++ b/src/test/regress/sql/write_gang_idle_in_transaction_session_timeout.sql
@@ -0,0 +1,28 @@
+-- GUC idle_in_transaction_session_timeout MUST not take effect on QE,
+-- this test guard that.
+-- In this test, DECLARE cursor will spawn a write gang
+-- and a read gang. And we set idle_in_transaction_session_timeout
+-- to 1s, when FETCH is executed, the read gang will sleep 2s because
+-- of the fault injection. However, without the fix, the write gang will be
+-- terminated 1s later when FETCH is issued due to the timeout of
+-- idle_in_transaction_session_timeout. So when the reader is going to read the
+-- shared snapshot, ERROR will be raised.
+
+CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a);
+INSERT INTO t_idle_trx_timeout VALUES (2),(3);
+SELECT gp_segment_id, * FROM t_idle_trx_timeout;
+
+SET idle_in_transaction_session_timeout = 1000;
+SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'sleep', '', 
'', '', 1, 1, 2, dbid)
+ FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+BEGIN;
+DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout;
+FETCH cur;
+FETCH cur;
+END;
+
+SELECT gp_inject_fault('before_read_shared_snapshot_for_cursor', 'reset', 
dbid) 
+ FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+
+DROP TABLE t_idle_trx_timeout;
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to