This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 3e8832121a5faff63a54b9333e16c66979da73ed
Author: Alexandra Wang <[email protected]>
AuthorDate: Tue Jan 3 15:01:49 2023 -0800

    Revert "Fix incremental recovery failure because the checkpoint redo wal 
file before divergence LSN is gone."
    
    This reverts commit 185c7d796f9cfd431cea69c05dc06932faa0300b.
---
 src/backend/access/transam/test/xlog_test.c        |   2 +-
 src/backend/access/transam/xlog.c                  |  38 +--
 .../expected/pg_rewind_fail_missing_xlog.out       | 254 ---------------------
 src/test/isolation2/isolation2_schedule            |   1 -
 .../isolation2/sql/pg_rewind_fail_missing_xlog.sql | 119 ----------
 5 files changed, 11 insertions(+), 403 deletions(-)

diff --git a/src/backend/access/transam/test/xlog_test.c 
b/src/backend/access/transam/test/xlog_test.c
index dacebb77a6..b8cc84e09f 100644
--- a/src/backend/access/transam/test/xlog_test.c
+++ b/src/backend/access/transam/test/xlog_test.c
@@ -8,7 +8,7 @@
 static void
 KeepLogSeg_wrapper(XLogRecPtr recptr, XLogSegNo *logSegNo)
 {
-       KeepLogSeg(recptr, logSegNo, InvalidXLogRecPtr);
+       KeepLogSeg(recptr, logSegNo);
 }
 
 static void
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index d1ca208003..328681c514 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -957,7 +957,7 @@ static void LocalSetXLogInsertAllowed(void);
 static void CreateEndOfRecoveryRecord(void);
 static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn);
 static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
-static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr 
PriorRedoPtr);
+static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo);
 static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void);
 
 static void AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic);
@@ -9908,7 +9908,7 @@ CreateCheckPoint(int flags)
         * prevent the disk holding the xlog from growing full.
         */
        XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
-       KeepLogSeg(recptr, &_logSegNo, PriorRedoPtr);
+       KeepLogSeg(recptr, &_logSegNo);
        if (InvalidateObsoleteReplicationSlots(_logSegNo))
        {
                /*
@@ -9916,7 +9916,7 @@ CreateCheckPoint(int flags)
                 * horizon, starting again from RedoRecPtr.
                 */
                XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
-               KeepLogSeg(recptr, &_logSegNo, PriorRedoPtr);
+               KeepLogSeg(recptr, &_logSegNo);
        }
        _logSegNo--;
        RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr);
@@ -10320,7 +10320,7 @@ CreateRestartPoint(int flags)
        receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
        replayPtr = GetXLogReplayRecPtr(&replayTLI);
        endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
-       KeepLogSeg(endptr, &_logSegNo, InvalidXLogRecPtr);
+       KeepLogSeg(endptr, &_logSegNo);
        if (InvalidateObsoleteReplicationSlots(_logSegNo))
        {
                /*
@@ -10328,7 +10328,7 @@ CreateRestartPoint(int flags)
                 * horizon, starting again from RedoRecPtr.
                 */
                XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
-               KeepLogSeg(endptr, &_logSegNo, InvalidXLogRecPtr);
+               KeepLogSeg(endptr, &_logSegNo);
        }
        _logSegNo--;
 
@@ -10447,7 +10447,7 @@ GetWALAvailability(XLogRecPtr targetLSN)
 
        /* calculate oldest segment currently needed by slots */
        XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
-       KeepLogSeg(currpos, &oldestSlotSeg, InvalidXLogRecPtr);
+       KeepLogSeg(currpos, &oldestSlotSeg);
 
        /*
         * Find the oldest extant segment file. We get 1 until checkpoint 
removes
@@ -10508,12 +10508,11 @@ GetWALAvailability(XLogRecPtr targetLSN)
  * invalidation is optionally done here, instead.
  */
 static void
-KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr PriorRedoPtr)
+KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
 {
        XLogSegNo       currSegNo;
        XLogSegNo       segno;
        XLogRecPtr      keep;
-       static XLogRecPtr CkptRedoBeforeMinLSN = InvalidXLogRecPtr;
 
        XLByteToSeg(recptr, currSegNo, wal_segment_size);
        segno = currSegNo;
@@ -10531,34 +10530,17 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, 
XLogRecPtr PriorRedoPtr)
 
 #ifdef FAULT_INJECTOR
        /*
-        * Let the WAL still needed be removed.  This is used to test if WAL 
sender
-        * can recognize that an incremental recovery has failed when the WAL
+        * Ignore the replication slot's LSN and let the WAL still needed by the
+        * replication slot to be removed.  This is used to test if WAL sender 
can
+        * recognize that an incremental recovery has failed when the WAL
         * requested by a mirror no longer exists.
         */
        if (SIMPLE_FAULT_INJECTOR("keep_log_seg") == FaultInjectorTypeSkip)
-       {
                keep = GetXLogWriteRecPtr();
-               XLByteToSeg(keep, *logSegNo, wal_segment_size);
-       }
 #endif
 
        if (keep != InvalidXLogRecPtr)
        {
-               /*
-                * GPDB never uses restart_lsn as lowest cut-off point. Instead 
always
-                * will use Checkpoint redo location prior to restart_lsn as 
cut-off
-                * point.
-                */
-               if (!XLogRecPtrIsInvalid(PriorRedoPtr))
-               {
-                       if (PriorRedoPtr < keep)
-                       {
-                               keep = PriorRedoPtr;
-                               CkptRedoBeforeMinLSN = PriorRedoPtr;
-                       }
-                       else if (!XLogRecPtrIsInvalid(CkptRedoBeforeMinLSN))
-                               keep = CkptRedoBeforeMinLSN;
-               }
                XLByteToSeg(keep, segno, wal_segment_size);
 
                /* Cap by max_slot_wal_keep_size ... */
diff --git a/src/test/isolation2/expected/pg_rewind_fail_missing_xlog.out 
b/src/test/isolation2/expected/pg_rewind_fail_missing_xlog.out
deleted file mode 100644
index 105ea1de92..0000000000
--- a/src/test/isolation2/expected/pg_rewind_fail_missing_xlog.out
+++ /dev/null
@@ -1,254 +0,0 @@
--- Test the bug that if checkpoint.redo before the oldest replication slot LSN
--- is removed/recylced in checkpointer, gprecoverseg (based on pg_rewind) would
--- would fail.
-
-CREATE TABLE tst_missing_tbl (a int);
-CREATE
-INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-
--- make the test faster.
-!\retcode gpconfig -c wal_keep_size -v 128;
-(exited with code 0)
-!\retcode gpstop -ari;
-(exited with code 0)
-
--- Test 1: primary was marked down by the master but acetually it keeps running
--- and previously, checkpoints could recycle/remove the checkpoint.redo wal
--- file before the oldest replication slot LSN and thus make pg_rewind fail due
--- to missing xlog file.
-
--- Run a checkpoint so that the below sqls won't cause a checkpoint
--- until an explicit checkpoint command is issued by the test.
--- checkpoint_timeout is by default 300 but the below test should be able to
--- finish in 300 seconds.
-1: CHECKPOINT;
-CHECKPOINT
-
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
--- Should be not needed mostly but let's 100% ensure since pg_switch_wal()
--- won't switch if it has been on the boundary (seldom though).
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-
--- Mark down the primary with content 0 via fts fault injection.
-1: SELECT gp_inject_fault_infinite('fts_handle_message', 'error', dbid) FROM 
gp_segment_configuration WHERE content = 0 AND role = 'p';
- gp_inject_fault_infinite 
---------------------------
- Success:                 
-(1 row)
-
--- Trigger failover and double check.
-1: SELECT gp_request_fts_probe_scan();
- gp_request_fts_probe_scan 
----------------------------
- t                         
-(1 row)
-1: SELECT role, preferred_role from gp_segment_configuration where content = 0;
- role | preferred_role 
-------+----------------
- m    | p              
- p    | m              
-(2 rows)
-
--- Run two more checkpoints. Previously this causes the checkpoint.redo wal
--- file before the oldest replication slot LSN is recycled/removed.
-0M: CHECKPOINT;
-CHECKPOINT
-0M: CHECKPOINT;
-CHECKPOINT
-
--- Wait some seconds until the promotion is done. When the query comes too 
early,
--- the promoted primary is still hot-standby, but we don't support hot-standby 
now.
-2: select pg_sleep(2);
- pg_sleep 
-----------
-          
-(1 row)
-
--- Write something (promote adds a 'End Of Recovery' xlog that causes the
--- divergence between primary and mirror, but I add a write here so that we
--- know that a wal divergence is explicitly triggered and 100% completed.  Also
--- sanity check the tuple distribution (assumption of the test).
-2: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-2: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id;
- gp_segment_id | count 
----------------+-------
- 0             | 6     
- 1             | 6     
- 2             | 6     
-(3 rows)
-
--- Ensure that pg_rewind succeeds. Previously it could fail since the 
divergence
--- LSN wal file is missing.
-!\retcode gprecoverseg -av;
-(exited with code 0)
--- In case it fails it should not affect subsequent testing.
-!\retcode gprecoverseg -aF;
-(exited with code 0)
-2: SELECT wait_until_all_segments_synchronized();
- wait_until_all_segments_synchronized 
---------------------------------------
- OK                                   
-(1 row)
-
--- Test 2
--- primary is abnormally shutdown, but pg_rewind would call single mode
--- postgres to ensure it clean shutdown and that causes two checkpoints.
-
--- See previous comment for why.
-3: CHECKPOINT;
-CHECKPOINT
-
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
--- Should be not needed mostly but let's 100% ensure since pg_switch_wal()
--- won't switch if it is on the boundary already (seldom though).
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
- ?column? 
-----------
- t        
-(1 row)
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-
--- Hang at checkpointer before writing checkpoint xlog.
-3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'suspend', dbid) 
FROM gp_segment_configuration WHERE role='p' AND content = 1;
- gp_inject_fault 
------------------
- Success:        
-(1 row)
-1U&: CHECKPOINT;  <waiting ...>
-3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, 
dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1;
- gp_wait_until_triggered_fault 
--------------------------------
- Success:                      
-(1 row)
-
--- Stop the primary immediately and promote the mirror.
-3: SELECT pg_ctl(datadir, 'stop', 'immediate') FROM gp_segment_configuration 
WHERE role='p' AND content = 1;
- pg_ctl 
---------
- OK     
-(1 row)
-3: SELECT gp_request_fts_probe_scan();
- gp_request_fts_probe_scan 
----------------------------
- t                         
-(1 row)
--- Wait for the end of recovery CHECKPOINT completed after the mirror was 
promoted
-3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'skip', dbid) 
FROM gp_segment_configuration WHERE role='p' AND content = 1;
- gp_inject_fault 
------------------
- Success:        
-(1 row)
-3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, 
dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1;
- gp_wait_until_triggered_fault 
--------------------------------
- Success:                      
-(1 row)
-3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'reset', dbid) 
FROM gp_segment_configuration WHERE role = 'p' AND content = 1;
- gp_inject_fault 
------------------
- Success:        
-(1 row)
-3: SELECT role, preferred_role from gp_segment_configuration where content = 1;
- role | preferred_role 
-------+----------------
- m    | p              
- p    | m              
-(2 rows)
-
-4: INSERT INTO tst_missing_tbl values(2),(1),(5);
-INSERT 3
-4: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id;
- gp_segment_id | count 
----------------+-------
- 1             | 11    
- 0             | 11    
- 2             | 11    
-(3 rows)
-
--- CHECKPOINT should fail now.
-1U<:  <... completed>
-server closed the connection unexpectedly
-       This probably means the server terminated abnormally
-       before or while processing the request.
-1Uq: ... <quitting>
-
--- Ensure that pg_rewind succeeds. For unclean shutdown, there are two
--- checkpoints are introduced in pg_rewind when running single-mode postgres
--- (one is the checkpoint after crash recovery and another is the shutdown
--- checkpoint) and previously the checkpoints clean up the wal files that
--- include the previous checkpoint (before divergence LSN) for pg_rewind and
--- thus makes gprecoverseg (pg_rewind) fail.
-!\retcode gprecoverseg -av;
-(exited with code 0)
--- In case it fails it should not affect subsequent testing.
-!\retcode gprecoverseg -aF;
-(exited with code 0)
-4: SELECT wait_until_all_segments_synchronized();
- wait_until_all_segments_synchronized 
---------------------------------------
- OK                                   
-(1 row)
-
--- Cleanup
-5: DROP TABLE tst_missing_tbl;
-DROP
-!\retcode gprecoverseg -ar;
-(exited with code 0)
-5: SELECT wait_until_all_segments_synchronized();
- wait_until_all_segments_synchronized 
---------------------------------------
- OK                                   
-(1 row)
-!\retcode gpconfig -r wal_keep_size;
-(exited with code 0)
-!\retcode gpstop -ari;
-(exited with code 0)
diff --git a/src/test/isolation2/isolation2_schedule 
b/src/test/isolation2/isolation2_schedule
index 7a2c8bc317..c5fa132ef0 100644
--- a/src/test/isolation2/isolation2_schedule
+++ b/src/test/isolation2/isolation2_schedule
@@ -8,7 +8,6 @@ test: checkpoint_dtx_info
 test: autovacuum-analyze
 test: vacuum_skip_locked_onseg
 test: lockmodes
-# test: pg_rewind_fail_missing_xlog
 test: prepared_xact_deadlock_pg_rewind
 test: ao_partition_lock
 test: concurrent_partition_table_operations_should_not_deadlock
diff --git a/src/test/isolation2/sql/pg_rewind_fail_missing_xlog.sql 
b/src/test/isolation2/sql/pg_rewind_fail_missing_xlog.sql
deleted file mode 100644
index b0af3910f6..0000000000
--- a/src/test/isolation2/sql/pg_rewind_fail_missing_xlog.sql
+++ /dev/null
@@ -1,119 +0,0 @@
--- Test the bug that if checkpoint.redo before the oldest replication slot LSN
--- is removed/recylced in checkpointer, gprecoverseg (based on pg_rewind) would
--- would fail.
-
-CREATE TABLE tst_missing_tbl (a int);
-INSERT INTO tst_missing_tbl values(2),(1),(5);
-
--- make the test faster.
-!\retcode gpconfig -c wal_keep_size -v 128;
-!\retcode gpstop -ari;
-
--- Test 1: primary was marked down by the master but acetually it keeps running
--- and previously, checkpoints could recycle/remove the checkpoint.redo wal
--- file before the oldest replication slot LSN and thus make pg_rewind fail due
--- to missing xlog file.
-
--- Run a checkpoint so that the below sqls won't cause a checkpoint
--- until an explicit checkpoint command is issued by the test.
--- checkpoint_timeout is by default 300 but the below test should be able to
--- finish in 300 seconds.
-1: CHECKPOINT;
-
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
--- Should be not needed mostly but let's 100% ensure since pg_switch_wal()
--- won't switch if it has been on the boundary (seldom though).
-0U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-1: INSERT INTO tst_missing_tbl values(2),(1),(5);
-
--- Mark down the primary with content 0 via fts fault injection.
-1: SELECT gp_inject_fault_infinite('fts_handle_message', 'error', dbid) FROM 
gp_segment_configuration WHERE content = 0 AND role = 'p';
-
--- Trigger failover and double check.
-1: SELECT gp_request_fts_probe_scan();
-1: SELECT role, preferred_role from gp_segment_configuration where content = 0;
-
--- Run two more checkpoints. Previously this causes the checkpoint.redo wal
--- file before the oldest replication slot LSN is recycled/removed.
-0M: CHECKPOINT;
-0M: CHECKPOINT;
-
--- Wait some seconds until the promotion is done. When the query comes too 
early,
--- the promoted primary is still hot-standby, but we don't support hot-standby 
now.
-2: select pg_sleep(2);
-
--- Write something (promote adds a 'End Of Recovery' xlog that causes the
--- divergence between primary and mirror, but I add a write here so that we
--- know that a wal divergence is explicitly triggered and 100% completed.  Also
--- sanity check the tuple distribution (assumption of the test).
-2: INSERT INTO tst_missing_tbl values(2),(1),(5);
-2: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id;
-
--- Ensure that pg_rewind succeeds. Previously it could fail since the 
divergence
--- LSN wal file is missing.
-!\retcode gprecoverseg -av;
--- In case it fails it should not affect subsequent testing.
-!\retcode gprecoverseg -aF;
-2: SELECT wait_until_all_segments_synchronized();
-
--- Test 2
--- primary is abnormally shutdown, but pg_rewind would call single mode
--- postgres to ensure it clean shutdown and that causes two checkpoints.
-
--- See previous comment for why.
-3: CHECKPOINT;
-
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
--- Should be not needed mostly but let's 100% ensure since pg_switch_wal()
--- won't switch if it is on the boundary already (seldom though).
-1U: SELECT pg_switch_wal is not null FROM pg_switch_wal();
-3: INSERT INTO tst_missing_tbl values(2),(1),(5);
-
--- Hang at checkpointer before writing checkpoint xlog.
-3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'suspend', dbid) 
FROM gp_segment_configuration WHERE role='p' AND content = 1;
-1U&: CHECKPOINT;
-3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, 
dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1;
-
--- Stop the primary immediately and promote the mirror.
-3: SELECT pg_ctl(datadir, 'stop', 'immediate') FROM gp_segment_configuration 
WHERE role='p' AND content = 1;
-3: SELECT gp_request_fts_probe_scan();
--- Wait for the end of recovery CHECKPOINT completed after the mirror was 
promoted
-3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'skip', dbid) 
FROM gp_segment_configuration WHERE role='p' AND content = 1;
-3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, 
dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1;
-3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'reset', dbid) 
FROM gp_segment_configuration WHERE role = 'p' AND content = 1;
-3: SELECT role, preferred_role from gp_segment_configuration where content = 1;
-
-4: INSERT INTO tst_missing_tbl values(2),(1),(5);
-4: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id;
-
--- CHECKPOINT should fail now.
-1U<:
-1Uq:
-
--- Ensure that pg_rewind succeeds. For unclean shutdown, there are two
--- checkpoints are introduced in pg_rewind when running single-mode postgres
--- (one is the checkpoint after crash recovery and another is the shutdown
--- checkpoint) and previously the checkpoints clean up the wal files that
--- include the previous checkpoint (before divergence LSN) for pg_rewind and
--- thus makes gprecoverseg (pg_rewind) fail.
-!\retcode gprecoverseg -av;
--- In case it fails it should not affect subsequent testing.
-!\retcode gprecoverseg -aF;
-4: SELECT wait_until_all_segments_synchronized();
-
--- Cleanup
-5: DROP TABLE tst_missing_tbl;
-!\retcode gprecoverseg -ar;
-5: SELECT wait_until_all_segments_synchronized();
-!\retcode gpconfig -r wal_keep_size;
-!\retcode gpstop -ari;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to