This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 3e8832121a5faff63a54b9333e16c66979da73ed Author: Alexandra Wang <[email protected]> AuthorDate: Tue Jan 3 15:01:49 2023 -0800 Revert "Fix incremental recovery failure because the checkpoint redo wal file before divergence LSN is gone." This reverts commit 185c7d796f9cfd431cea69c05dc06932faa0300b. --- src/backend/access/transam/test/xlog_test.c | 2 +- src/backend/access/transam/xlog.c | 38 +-- .../expected/pg_rewind_fail_missing_xlog.out | 254 --------------------- src/test/isolation2/isolation2_schedule | 1 - .../isolation2/sql/pg_rewind_fail_missing_xlog.sql | 119 ---------- 5 files changed, 11 insertions(+), 403 deletions(-) diff --git a/src/backend/access/transam/test/xlog_test.c b/src/backend/access/transam/test/xlog_test.c index dacebb77a6..b8cc84e09f 100644 --- a/src/backend/access/transam/test/xlog_test.c +++ b/src/backend/access/transam/test/xlog_test.c @@ -8,7 +8,7 @@ static void KeepLogSeg_wrapper(XLogRecPtr recptr, XLogSegNo *logSegNo) { - KeepLogSeg(recptr, logSegNo, InvalidXLogRecPtr); + KeepLogSeg(recptr, logSegNo); } static void diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d1ca208003..328681c514 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -957,7 +957,7 @@ static void LocalSetXLogInsertAllowed(void); static void CreateEndOfRecoveryRecord(void); static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn); static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags); -static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr PriorRedoPtr); +static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo); static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void); static void AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic); @@ -9908,7 +9908,7 @@ CreateCheckPoint(int flags) * prevent the disk holding the xlog from growing full. */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); - KeepLogSeg(recptr, &_logSegNo, PriorRedoPtr); + KeepLogSeg(recptr, &_logSegNo); if (InvalidateObsoleteReplicationSlots(_logSegNo)) { /* @@ -9916,7 +9916,7 @@ CreateCheckPoint(int flags) * horizon, starting again from RedoRecPtr. */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); - KeepLogSeg(recptr, &_logSegNo, PriorRedoPtr); + KeepLogSeg(recptr, &_logSegNo); } _logSegNo--; RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr); @@ -10320,7 +10320,7 @@ CreateRestartPoint(int flags) receivePtr = GetWalRcvFlushRecPtr(NULL, NULL); replayPtr = GetXLogReplayRecPtr(&replayTLI); endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr; - KeepLogSeg(endptr, &_logSegNo, InvalidXLogRecPtr); + KeepLogSeg(endptr, &_logSegNo); if (InvalidateObsoleteReplicationSlots(_logSegNo)) { /* @@ -10328,7 +10328,7 @@ CreateRestartPoint(int flags) * horizon, starting again from RedoRecPtr. */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); - KeepLogSeg(endptr, &_logSegNo, InvalidXLogRecPtr); + KeepLogSeg(endptr, &_logSegNo); } _logSegNo--; @@ -10447,7 +10447,7 @@ GetWALAvailability(XLogRecPtr targetLSN) /* calculate oldest segment currently needed by slots */ XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size); - KeepLogSeg(currpos, &oldestSlotSeg, InvalidXLogRecPtr); + KeepLogSeg(currpos, &oldestSlotSeg); /* * Find the oldest extant segment file. We get 1 until checkpoint removes @@ -10508,12 +10508,11 @@ GetWALAvailability(XLogRecPtr targetLSN) * invalidation is optionally done here, instead. */ static void -KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr PriorRedoPtr) +KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) { XLogSegNo currSegNo; XLogSegNo segno; XLogRecPtr keep; - static XLogRecPtr CkptRedoBeforeMinLSN = InvalidXLogRecPtr; XLByteToSeg(recptr, currSegNo, wal_segment_size); segno = currSegNo; @@ -10531,34 +10530,17 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo, XLogRecPtr PriorRedoPtr) #ifdef FAULT_INJECTOR /* - * Let the WAL still needed be removed. This is used to test if WAL sender - * can recognize that an incremental recovery has failed when the WAL + * Ignore the replication slot's LSN and let the WAL still needed by the + * replication slot to be removed. This is used to test if WAL sender can + * recognize that an incremental recovery has failed when the WAL * requested by a mirror no longer exists. */ if (SIMPLE_FAULT_INJECTOR("keep_log_seg") == FaultInjectorTypeSkip) - { keep = GetXLogWriteRecPtr(); - XLByteToSeg(keep, *logSegNo, wal_segment_size); - } #endif if (keep != InvalidXLogRecPtr) { - /* - * GPDB never uses restart_lsn as lowest cut-off point. Instead always - * will use Checkpoint redo location prior to restart_lsn as cut-off - * point. - */ - if (!XLogRecPtrIsInvalid(PriorRedoPtr)) - { - if (PriorRedoPtr < keep) - { - keep = PriorRedoPtr; - CkptRedoBeforeMinLSN = PriorRedoPtr; - } - else if (!XLogRecPtrIsInvalid(CkptRedoBeforeMinLSN)) - keep = CkptRedoBeforeMinLSN; - } XLByteToSeg(keep, segno, wal_segment_size); /* Cap by max_slot_wal_keep_size ... */ diff --git a/src/test/isolation2/expected/pg_rewind_fail_missing_xlog.out b/src/test/isolation2/expected/pg_rewind_fail_missing_xlog.out deleted file mode 100644 index 105ea1de92..0000000000 --- a/src/test/isolation2/expected/pg_rewind_fail_missing_xlog.out +++ /dev/null @@ -1,254 +0,0 @@ --- Test the bug that if checkpoint.redo before the oldest replication slot LSN --- is removed/recylced in checkpointer, gprecoverseg (based on pg_rewind) would --- would fail. - -CREATE TABLE tst_missing_tbl (a int); -CREATE -INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 - --- make the test faster. -!\retcode gpconfig -c wal_keep_size -v 128; -(exited with code 0) -!\retcode gpstop -ari; -(exited with code 0) - --- Test 1: primary was marked down by the master but acetually it keeps running --- and previously, checkpoints could recycle/remove the checkpoint.redo wal --- file before the oldest replication slot LSN and thus make pg_rewind fail due --- to missing xlog file. - --- Run a checkpoint so that the below sqls won't cause a checkpoint --- until an explicit checkpoint command is issued by the test. --- checkpoint_timeout is by default 300 but the below test should be able to --- finish in 300 seconds. -1: CHECKPOINT; -CHECKPOINT - -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -1: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -1: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -1: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 --- Should be not needed mostly but let's 100% ensure since pg_switch_wal() --- won't switch if it has been on the boundary (seldom though). -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -1: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 - --- Mark down the primary with content 0 via fts fault injection. -1: SELECT gp_inject_fault_infinite('fts_handle_message', 'error', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; - gp_inject_fault_infinite --------------------------- - Success: -(1 row) - --- Trigger failover and double check. -1: SELECT gp_request_fts_probe_scan(); - gp_request_fts_probe_scan ---------------------------- - t -(1 row) -1: SELECT role, preferred_role from gp_segment_configuration where content = 0; - role | preferred_role -------+---------------- - m | p - p | m -(2 rows) - --- Run two more checkpoints. Previously this causes the checkpoint.redo wal --- file before the oldest replication slot LSN is recycled/removed. -0M: CHECKPOINT; -CHECKPOINT -0M: CHECKPOINT; -CHECKPOINT - --- Wait some seconds until the promotion is done. When the query comes too early, --- the promoted primary is still hot-standby, but we don't support hot-standby now. -2: select pg_sleep(2); - pg_sleep ----------- - -(1 row) - --- Write something (promote adds a 'End Of Recovery' xlog that causes the --- divergence between primary and mirror, but I add a write here so that we --- know that a wal divergence is explicitly triggered and 100% completed. Also --- sanity check the tuple distribution (assumption of the test). -2: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 -2: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; - gp_segment_id | count ----------------+------- - 0 | 6 - 1 | 6 - 2 | 6 -(3 rows) - --- Ensure that pg_rewind succeeds. Previously it could fail since the divergence --- LSN wal file is missing. -!\retcode gprecoverseg -av; -(exited with code 0) --- In case it fails it should not affect subsequent testing. -!\retcode gprecoverseg -aF; -(exited with code 0) -2: SELECT wait_until_all_segments_synchronized(); - wait_until_all_segments_synchronized --------------------------------------- - OK -(1 row) - --- Test 2 --- primary is abnormally shutdown, but pg_rewind would call single mode --- postgres to ensure it clean shutdown and that causes two checkpoints. - --- See previous comment for why. -3: CHECKPOINT; -CHECKPOINT - -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -3: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -3: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -3: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 --- Should be not needed mostly but let's 100% ensure since pg_switch_wal() --- won't switch if it is on the boundary already (seldom though). -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); - ?column? ----------- - t -(1 row) -3: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 - --- Hang at checkpointer before writing checkpoint xlog. -3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'suspend', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; - gp_inject_fault ------------------ - Success: -(1 row) -1U&: CHECKPOINT; <waiting ...> -3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; - gp_wait_until_triggered_fault -------------------------------- - Success: -(1 row) - --- Stop the primary immediately and promote the mirror. -3: SELECT pg_ctl(datadir, 'stop', 'immediate') FROM gp_segment_configuration WHERE role='p' AND content = 1; - pg_ctl --------- - OK -(1 row) -3: SELECT gp_request_fts_probe_scan(); - gp_request_fts_probe_scan ---------------------------- - t -(1 row) --- Wait for the end of recovery CHECKPOINT completed after the mirror was promoted -3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'skip', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; - gp_inject_fault ------------------ - Success: -(1 row) -3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1; - gp_wait_until_triggered_fault -------------------------------- - Success: -(1 row) -3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1; - gp_inject_fault ------------------ - Success: -(1 row) -3: SELECT role, preferred_role from gp_segment_configuration where content = 1; - role | preferred_role -------+---------------- - m | p - p | m -(2 rows) - -4: INSERT INTO tst_missing_tbl values(2),(1),(5); -INSERT 3 -4: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; - gp_segment_id | count ----------------+------- - 1 | 11 - 0 | 11 - 2 | 11 -(3 rows) - --- CHECKPOINT should fail now. -1U<: <... completed> -server closed the connection unexpectedly - This probably means the server terminated abnormally - before or while processing the request. -1Uq: ... <quitting> - --- Ensure that pg_rewind succeeds. For unclean shutdown, there are two --- checkpoints are introduced in pg_rewind when running single-mode postgres --- (one is the checkpoint after crash recovery and another is the shutdown --- checkpoint) and previously the checkpoints clean up the wal files that --- include the previous checkpoint (before divergence LSN) for pg_rewind and --- thus makes gprecoverseg (pg_rewind) fail. -!\retcode gprecoverseg -av; -(exited with code 0) --- In case it fails it should not affect subsequent testing. -!\retcode gprecoverseg -aF; -(exited with code 0) -4: SELECT wait_until_all_segments_synchronized(); - wait_until_all_segments_synchronized --------------------------------------- - OK -(1 row) - --- Cleanup -5: DROP TABLE tst_missing_tbl; -DROP -!\retcode gprecoverseg -ar; -(exited with code 0) -5: SELECT wait_until_all_segments_synchronized(); - wait_until_all_segments_synchronized --------------------------------------- - OK -(1 row) -!\retcode gpconfig -r wal_keep_size; -(exited with code 0) -!\retcode gpstop -ari; -(exited with code 0) diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 7a2c8bc317..c5fa132ef0 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -8,7 +8,6 @@ test: checkpoint_dtx_info test: autovacuum-analyze test: vacuum_skip_locked_onseg test: lockmodes -# test: pg_rewind_fail_missing_xlog test: prepared_xact_deadlock_pg_rewind test: ao_partition_lock test: concurrent_partition_table_operations_should_not_deadlock diff --git a/src/test/isolation2/sql/pg_rewind_fail_missing_xlog.sql b/src/test/isolation2/sql/pg_rewind_fail_missing_xlog.sql deleted file mode 100644 index b0af3910f6..0000000000 --- a/src/test/isolation2/sql/pg_rewind_fail_missing_xlog.sql +++ /dev/null @@ -1,119 +0,0 @@ --- Test the bug that if checkpoint.redo before the oldest replication slot LSN --- is removed/recylced in checkpointer, gprecoverseg (based on pg_rewind) would --- would fail. - -CREATE TABLE tst_missing_tbl (a int); -INSERT INTO tst_missing_tbl values(2),(1),(5); - --- make the test faster. -!\retcode gpconfig -c wal_keep_size -v 128; -!\retcode gpstop -ari; - --- Test 1: primary was marked down by the master but acetually it keeps running --- and previously, checkpoints could recycle/remove the checkpoint.redo wal --- file before the oldest replication slot LSN and thus make pg_rewind fail due --- to missing xlog file. - --- Run a checkpoint so that the below sqls won't cause a checkpoint --- until an explicit checkpoint command is issued by the test. --- checkpoint_timeout is by default 300 but the below test should be able to --- finish in 300 seconds. -1: CHECKPOINT; - -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -1: INSERT INTO tst_missing_tbl values(2),(1),(5); -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -1: INSERT INTO tst_missing_tbl values(2),(1),(5); -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -1: INSERT INTO tst_missing_tbl values(2),(1),(5); --- Should be not needed mostly but let's 100% ensure since pg_switch_wal() --- won't switch if it has been on the boundary (seldom though). -0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -1: INSERT INTO tst_missing_tbl values(2),(1),(5); - --- Mark down the primary with content 0 via fts fault injection. -1: SELECT gp_inject_fault_infinite('fts_handle_message', 'error', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; - --- Trigger failover and double check. -1: SELECT gp_request_fts_probe_scan(); -1: SELECT role, preferred_role from gp_segment_configuration where content = 0; - --- Run two more checkpoints. Previously this causes the checkpoint.redo wal --- file before the oldest replication slot LSN is recycled/removed. -0M: CHECKPOINT; -0M: CHECKPOINT; - --- Wait some seconds until the promotion is done. When the query comes too early, --- the promoted primary is still hot-standby, but we don't support hot-standby now. -2: select pg_sleep(2); - --- Write something (promote adds a 'End Of Recovery' xlog that causes the --- divergence between primary and mirror, but I add a write here so that we --- know that a wal divergence is explicitly triggered and 100% completed. Also --- sanity check the tuple distribution (assumption of the test). -2: INSERT INTO tst_missing_tbl values(2),(1),(5); -2: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; - --- Ensure that pg_rewind succeeds. Previously it could fail since the divergence --- LSN wal file is missing. -!\retcode gprecoverseg -av; --- In case it fails it should not affect subsequent testing. -!\retcode gprecoverseg -aF; -2: SELECT wait_until_all_segments_synchronized(); - --- Test 2 --- primary is abnormally shutdown, but pg_rewind would call single mode --- postgres to ensure it clean shutdown and that causes two checkpoints. - --- See previous comment for why. -3: CHECKPOINT; - -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -3: INSERT INTO tst_missing_tbl values(2),(1),(5); -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -3: INSERT INTO tst_missing_tbl values(2),(1),(5); -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -3: INSERT INTO tst_missing_tbl values(2),(1),(5); --- Should be not needed mostly but let's 100% ensure since pg_switch_wal() --- won't switch if it is on the boundary already (seldom though). -1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); -3: INSERT INTO tst_missing_tbl values(2),(1),(5); - --- Hang at checkpointer before writing checkpoint xlog. -3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'suspend', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; -1U&: CHECKPOINT; -3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; - --- Stop the primary immediately and promote the mirror. -3: SELECT pg_ctl(datadir, 'stop', 'immediate') FROM gp_segment_configuration WHERE role='p' AND content = 1; -3: SELECT gp_request_fts_probe_scan(); --- Wait for the end of recovery CHECKPOINT completed after the mirror was promoted -3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'skip', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; -3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1; -3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1; -3: SELECT role, preferred_role from gp_segment_configuration where content = 1; - -4: INSERT INTO tst_missing_tbl values(2),(1),(5); -4: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; - --- CHECKPOINT should fail now. -1U<: -1Uq: - --- Ensure that pg_rewind succeeds. For unclean shutdown, there are two --- checkpoints are introduced in pg_rewind when running single-mode postgres --- (one is the checkpoint after crash recovery and another is the shutdown --- checkpoint) and previously the checkpoints clean up the wal files that --- include the previous checkpoint (before divergence LSN) for pg_rewind and --- thus makes gprecoverseg (pg_rewind) fail. -!\retcode gprecoverseg -av; --- In case it fails it should not affect subsequent testing. -!\retcode gprecoverseg -aF; -4: SELECT wait_until_all_segments_synchronized(); - --- Cleanup -5: DROP TABLE tst_missing_tbl; -!\retcode gprecoverseg -ar; -5: SELECT wait_until_all_segments_synchronized(); -!\retcode gpconfig -r wal_keep_size; -!\retcode gpstop -ari; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
