From cae8ef31feb4c8dc8beb3c7422548955455a35c9 Mon Sep 17 00:00:00 2001
From: "suyu.cmj" <mengjuan.cmj@alibaba-inc.com>
Date: Fri, 7 Jul 2023 08:44:49 +0000
Subject: [PATCH] Fix the bug of a 2PC transaction maybe recovered twice

During recovery, a two-phase transaction should be restored
either from the disk file or from the WAL. However, a two-phase
transaction maybe restored from both way if we crashed during
doing a checkpoint. Considering that the data on disk file may
not be reliable, so in this case, we only store the transaction
data recorded in the WAL.
---
 src/backend/access/transam/twophase.c | 61 +++++++++++++++++++++------
 1 file changed, 49 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index d145836a79..629b79401c 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -2491,6 +2491,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
 	char	   *bufptr;
 	const char *gid;
 	GlobalTransaction gxact;
+	bool addnewgxact = true;
+	int i;
 
 	Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
 	Assert(RecoveryInProgress());
@@ -2509,15 +2511,54 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
 	 * that it got added in the redo phase
 	 */
 
+	/*
+	 * During recovery, the two-phase data can be added in two ways:
+	 * 1) restored from disk file when its xid < checkPoint.nextxid,
+	 * 2) restored from the WAL when its prepare_start_lsn > checkPoint.redo,
+	 * A two-phase transaction may satisfy above two conditions if we
+	 * crashed during doing a checkpoint. Considering that the data
+	 * on disk file may not be reliable, so in this case, we only store
+	 * the transaction data recorded in the WAL.
+	 */
+	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+	{
+		GlobalTransaction curxact = TwoPhaseState->prepXacts[i];
+
+		if (curxact->xid == hdr->xid)
+		{
+			if (curxact->ondisk && !XLogRecPtrIsInvalid(start_lsn))
+			{
+				gxact = curxact;
+				addnewgxact = false;
+				ereport(WARNING,
+						(errmsg("found duplicate two-phase transaction:%u, store data from the WAL",
+								hdr->xid)));
+				break;
+			}
+			else
+				ereport(ERROR,
+						(errmsg("found unexpected duplicate two-phase transaction:%u, check for data correctness.",
+								hdr->xid)));
+		}
+	}
+
 	/* Get a free gxact from the freelist */
-	if (TwoPhaseState->freeGXacts == NULL)
-		ereport(ERROR,
-				(errcode(ERRCODE_OUT_OF_MEMORY),
-				 errmsg("maximum number of prepared transactions reached"),
-				 errhint("Increase max_prepared_transactions (currently %d).",
-						 max_prepared_xacts)));
-	gxact = TwoPhaseState->freeGXacts;
-	TwoPhaseState->freeGXacts = gxact->next;
+	if (addnewgxact)
+	{
+		if (TwoPhaseState->freeGXacts == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("maximum number of prepared transactions reached"),
+					 errhint("Increase max_prepared_transactions (currently %d).",
+							 max_prepared_xacts)));
+
+		gxact = TwoPhaseState->freeGXacts;
+		TwoPhaseState->freeGXacts = gxact->next;
+
+		/* And insert it into the active array */
+		Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts);
+		TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact;
+	}
 
 	gxact->prepared_at = hdr->prepared_at;
 	gxact->prepare_start_lsn = start_lsn;
@@ -2530,10 +2571,6 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
 	gxact->inredo = true;		/* yes, added in redo */
 	strcpy(gxact->gid, gid);
 
-	/* And insert it into the active array */
-	Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts);
-	TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact;
-
 	if (origin_id != InvalidRepOriginId)
 	{
 		/* recover apply progress */
-- 
2.19.1.6.gb485710b

