On 2021/03/03 17:46, Heikki Linnakangas wrote:

> I think it should be reset even earlier, inside XlogReadTwoPhaseData()
> probably. With your patch, doesn't the LogStandbySnapshot() call just
> above where you're ressetting ThisTimeLineID also write a WAL record
> with incorrect timeline?

Agreed.

On Wed, Mar 3, 2021 at 1:04 AM Fujii Masao <masao.fu...@oss.nttdata.com> wrote:

> > Even better, can we avoid setting ThisTimeLineID in XlogReadTwoPhaseData() 
> > in the first place?
>
>
>
> Or isn't it better to reset ThisTimeLineID in read_local_xlog_page(), i.e.,
> prevent read_local_xlog_page() from changing ThisTimeLineID? I'm not
> sure if that's possible, though.. In the future other functions that calls
> read_local_xlog_page() during the promotion may appear. Fixing the issue
> outside read_local_xlog_page() may cause those functions to get
> the same issue.

I agree. We should fix the issue in read_local_xlog_page(). I have
attached two different patches which do so:
saved_ThisTimeLineID.patch and pass_ThisTimeLineID.patch.

The former saves the value of the ThisTimeLineID before it gets changed
in read_local_xlog_page() and resets it after ThisTimeLineID has been
used later on in the code (by XLogReadDetermineTimeline()).

The latter removes occurrences of ThisTimeLineID from
XLogReadDetermineTimeline() and introduces an argument currTLI to
XLogReadDetermineTimeline() to be used in its stead.

Regards,
Soumyadeep
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index d17d660f460..a96fd236eeb 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -680,11 +680,12 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
  * The caller must also make sure it doesn't read past the current replay
  * position (using GetXLogReplayRecPtr) if executing in recovery, so it
  * doesn't fail to notice that the current timeline became historical. The
- * caller must also update ThisTimeLineID with the result of
+ * caller must also ensure that currTLI is updated with the result of
  * GetXLogReplayRecPtr and must check RecoveryInProgress().
  */
 void
-XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wantLength)
+XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage,
+						  uint32 wantLength, TimeLineID currTLI)
 {
 	const XLogRecPtr lastReadPage = (state->seg.ws_segno *
 									 state->segcxt.ws_segsize + state->segoff);
@@ -712,12 +713,12 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
 	 * just carry on. (Seeking backwards requires a check to make sure the
 	 * older page isn't on a prior timeline).
 	 *
-	 * ThisTimeLineID might've become historical since we last looked, but the
+	 * currTLI might be historical, but the
 	 * caller is required not to read past the flush limit it saw at the time
 	 * it looked up the timeline. There's nothing we can do about it if
 	 * StartupXLOG() renames it to .partial concurrently.
 	 */
-	if (state->currTLI == ThisTimeLineID && wantPage >= lastReadPage)
+	if (state->currTLI == currTLI && wantPage >= lastReadPage)
 	{
 		Assert(state->currTLIValidUntil == InvalidXLogRecPtr);
 		return;
@@ -729,7 +730,7 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
 	 * the current segment we can just keep reading.
 	 */
 	if (state->currTLIValidUntil != InvalidXLogRecPtr &&
-		state->currTLI != ThisTimeLineID &&
+		state->currTLI != currTLI &&
 		state->currTLI != 0 &&
 		((wantPage + wantLength) / state->segcxt.ws_segsize) <
 		(state->currTLIValidUntil / state->segcxt.ws_segsize))
@@ -752,7 +753,7 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
 		 * We need to re-read the timeline history in case it's been changed
 		 * by a promotion or replay from a cascaded replica.
 		 */
-		List	   *timelineHistory = readTimeLineHistory(ThisTimeLineID);
+		List	   *timelineHistory = readTimeLineHistory(currTLI);
 		XLogRecPtr	endOfSegment;
 
 		endOfSegment = ((wantPage / state->segcxt.ws_segsize) + 1) *
@@ -830,7 +831,7 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 {
 	XLogRecPtr	read_upto,
 				loc;
-	TimeLineID	tli;
+	TimeLineID	tli = ThisTimeLineID;
 	int			count;
 	WALReadError errinfo;
 
@@ -850,8 +851,7 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 		if (!RecoveryInProgress())
 			read_upto = GetFlushRecPtr();
 		else
-			read_upto = GetXLogReplayRecPtr(&ThisTimeLineID);
-		tli = ThisTimeLineID;
+			read_upto = GetXLogReplayRecPtr(&tli);
 
 		/*
 		 * Check which timeline to get the record from.
@@ -877,9 +877,9 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 		 * standby whose primary gets promoted while we're decoding, so a
 		 * one-off ERROR isn't too bad.
 		 */
-		XLogReadDetermineTimeline(state, targetPagePtr, reqLen);
+		XLogReadDetermineTimeline(state, targetPagePtr, reqLen, tli);
 
-		if (state->currTLI == ThisTimeLineID)
+		if (state->currTLI == tli)
 		{
 
 			if (loc <= read_upto)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 23baa4498af..bbff969ab52 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -815,7 +815,7 @@ logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int req
 	WALReadError errinfo;
 	XLogSegNo	segno;
 
-	XLogReadDetermineTimeline(state, targetPagePtr, reqLen);
+	XLogReadDetermineTimeline(state, targetPagePtr, reqLen, ThisTimeLineID);
 	sendTimeLineIsHistoric = (state->currTLI != ThisTimeLineID);
 	sendTimeLine = state->currTLI;
 	sendTimeLineValidUpto = state->currTLIValidUntil;
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 9ac602b674d..5c56c6beac3 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -56,7 +56,8 @@ extern void wal_segment_open(XLogReaderState *state,
 extern void wal_segment_close(XLogReaderState *state);
 
 extern void XLogReadDetermineTimeline(XLogReaderState *state,
-									  XLogRecPtr wantPage, uint32 wantLength);
+									  XLogRecPtr wantPage, uint32 wantLength,
+									  TimeLineID currTLI);
 
 extern void WALReadRaiseError(WALReadError *errinfo);
 
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index d17d660f460..9064fa19837 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -831,6 +831,7 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 	XLogRecPtr	read_upto,
 				loc;
 	TimeLineID	tli;
+	TimeLineID	saved_ThisTimeLineID = ThisTimeLineID;
 	int			count;
 	WALReadError errinfo;
 
@@ -915,6 +916,13 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
 		}
 	}
 
+	/*
+	 * We want subsequent writes to go to the latest timeline. Thus, resync
+	 * ThisTimeLineID, which may now be pointing to an older timeline, if we are
+	 * currently in recovery.
+	 */
+	ThisTimeLineID = saved_ThisTimeLineID;
+
 	if (targetPagePtr + XLOG_BLCKSZ <= read_upto)
 	{
 		/*

Reply via email to