From b6cb833a6fd4d4e0c87d3e4e3fce30910eba874a Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sun, 2 Apr 2023 03:26:35 +0300
Subject: [PATCH v4 1/2] Revise changes in 764da7710b and 11470f544e

The mentioned commits evade extra tuple re-fetching and allow locking last
versions of updated tuples in tuple_update() and tuple_delete().

This commit improves the things in the following ways.
 * tuple_update() and tuple_delete() can now fetch the old tuple version not
   only after lock, but also after successful UPDATE/DELETE.
 * Avoid tuple re-fetching in more general case including after row triggers.
 * Improve code API in heap_lock_tuple.  Automatically detect prefetched tuple
   in slot to avoid the extra work on reading the buffer.
 * Remove LazyTupleTableSlot.  It seems unreasonable complication for evading
   slot allocation, which is quite cheap.

Reported-by: Andres Freund
Discussion: https://postgr.es/m/20230323003003.plgaxjqahjgkuxrk%40awork3.anarazel.de
---
 src/backend/access/heap/heapam.c         | 205 +++++++++++++-----
 src/backend/access/heap/heapam_handler.c | 143 +++++--------
 src/backend/access/table/tableam.c       |  24 ++-
 src/backend/commands/trigger.c           |  55 ++---
 src/backend/executor/execReplication.c   |  19 +-
 src/backend/executor/nodeModifyTable.c   | 253 +++++++++--------------
 src/include/access/heapam.h              |  19 +-
 src/include/access/tableam.h             |  67 ++++--
 src/include/commands/trigger.h           |   4 +-
 src/include/executor/tuptable.h          |  38 ----
 src/tools/pgindent/typedefs.list         |   2 -
 11 files changed, 418 insertions(+), 411 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index f7d9ce59a47..3f1f23997fb 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2449,10 +2449,11 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
 }
 
 /*
- *	heap_delete - delete a tuple
+ *	heap_delete - delete a tuple, optionally fetching it into a slot
  *
  * See table_tuple_delete() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot.  Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
  *
  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -2461,8 +2462,9 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
  */
 TM_Result
 heap_delete(Relation relation, ItemPointer tid,
-			CommandId cid, Snapshot crosscheck, bool wait,
-			TM_FailureData *tmfd, bool changingPart)
+			CommandId cid, Snapshot crosscheck, int options,
+			TM_FailureData *tmfd, bool changingPart,
+			TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TransactionId xid = GetCurrentTransactionId();
@@ -2540,7 +2542,7 @@ l1:
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("attempted to delete invisible tuple")));
 	}
-	else if (result == TM_BeingModified && wait)
+	else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
 	{
 		TransactionId xwait;
 		uint16		infomask;
@@ -2676,7 +2678,30 @@ l1:
 			tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
 		else
 			tmfd->cmax = InvalidCommandId;
-		UnlockReleaseBuffer(buffer);
+
+		/*
+		 * If we're asked to lock the updated tuple, we just fetch the
+		 * existing tuple.  That let's the caller save some resources on
+		 * placing the lock.
+		 */
+		if (result == TM_Updated &&
+			(options & TABLE_MODIFY_LOCK_UPDATED))
+		{
+			BufferHeapTupleTableSlot *bslot;
+
+			Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+			bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+			bslot->base.tupdata = tp;
+			ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+										   oldSlot,
+										   buffer);
+		}
+		else
+		{
+			UnlockReleaseBuffer(buffer);
+		}
 		if (have_tuple_lock)
 			UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
 		if (vmbuffer != InvalidBuffer)
@@ -2850,8 +2875,24 @@ l1:
 	 */
 	CacheInvalidateHeapTuple(relation, &tp, NULL);
 
-	/* Now we can release the buffer */
-	ReleaseBuffer(buffer);
+	/* Fetch the old tuple version if we're asked for that. */
+	if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+	{
+		BufferHeapTupleTableSlot *bslot;
+
+		Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+		bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+		bslot->base.tupdata = tp;
+		ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+									   oldSlot,
+									   buffer);
+	}
+	else
+	{
+		/* Now we can release the buffer */
+		ReleaseBuffer(buffer);
+	}
 
 	/*
 	 * Release the lmgr tuple lock, if we had it.
@@ -2883,8 +2924,8 @@ simple_heap_delete(Relation relation, ItemPointer tid)
 
 	result = heap_delete(relation, tid,
 						 GetCurrentCommandId(true), InvalidSnapshot,
-						 true /* wait for commit */ ,
-						 &tmfd, false /* changingPart */ );
+						 TABLE_MODIFY_WAIT /* wait for commit */ ,
+						 &tmfd, false /* changingPart */ , NULL);
 	switch (result)
 	{
 		case TM_SelfModified:
@@ -2911,10 +2952,11 @@ simple_heap_delete(Relation relation, ItemPointer tid)
 }
 
 /*
- *	heap_update - replace a tuple
+ *	heap_update - replace a tuple, optionally fetching it into a slot
  *
  * See table_tuple_update() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot.  Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
  *
  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -2923,9 +2965,9 @@ simple_heap_delete(Relation relation, ItemPointer tid)
  */
 TM_Result
 heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
-			CommandId cid, Snapshot crosscheck, bool wait,
+			CommandId cid, Snapshot crosscheck, int options,
 			TM_FailureData *tmfd, LockTupleMode *lockmode,
-			TU_UpdateIndexes *update_indexes)
+			TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TransactionId xid = GetCurrentTransactionId();
@@ -3102,7 +3144,7 @@ l2:
 	result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
 
 	/* see below about the "no wait" case */
-	Assert(result != TM_BeingModified || wait);
+	Assert(result != TM_BeingModified || (options & TABLE_MODIFY_WAIT));
 
 	if (result == TM_Invisible)
 	{
@@ -3111,7 +3153,7 @@ l2:
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("attempted to update invisible tuple")));
 	}
-	else if (result == TM_BeingModified && wait)
+	else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
 	{
 		TransactionId xwait;
 		uint16		infomask;
@@ -3313,7 +3355,30 @@ l2:
 			tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
 		else
 			tmfd->cmax = InvalidCommandId;
-		UnlockReleaseBuffer(buffer);
+
+		/*
+		 * If we're asked to lock the updated tuple, we just fetch the
+		 * existing tuple.  That let's the caller save some resouces on
+		 * placing the lock.
+		 */
+		if (result == TM_Updated &&
+			(options & TABLE_MODIFY_LOCK_UPDATED))
+		{
+			BufferHeapTupleTableSlot *bslot;
+
+			Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+			bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+			bslot->base.tupdata = oldtup;
+			ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+										   oldSlot,
+										   buffer);
+		}
+		else
+		{
+			UnlockReleaseBuffer(buffer);
+		}
 		if (have_tuple_lock)
 			UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
 		if (vmbuffer != InvalidBuffer)
@@ -3791,7 +3856,26 @@ l2:
 	/* Now we can release the buffer(s) */
 	if (newbuf != buffer)
 		ReleaseBuffer(newbuf);
-	ReleaseBuffer(buffer);
+
+	/* Fetch the old tuple version if we're asked for that. */
+	if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+	{
+		BufferHeapTupleTableSlot *bslot;
+
+		Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+		bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+		bslot->base.tupdata = oldtup;
+		ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+									   oldSlot,
+									   buffer);
+	}
+	else
+	{
+		/* Now we can release the buffer */
+		ReleaseBuffer(buffer);
+	}
+
 	if (BufferIsValid(vmbuffer_new))
 		ReleaseBuffer(vmbuffer_new);
 	if (BufferIsValid(vmbuffer))
@@ -3999,8 +4083,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
 
 	result = heap_update(relation, otid, tup,
 						 GetCurrentCommandId(true), InvalidSnapshot,
-						 true /* wait for commit */ ,
-						 &tmfd, &lockmode, update_indexes);
+						 TABLE_MODIFY_WAIT /* wait for commit */ ,
+						 &tmfd, &lockmode, update_indexes, NULL);
 	switch (result)
 	{
 		case TM_SelfModified:
@@ -4063,12 +4147,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  *		tuples.
  *
  * Output parameters:
- *	*tuple: all fields filled in
- *	*buffer: set to buffer holding tuple (pinned but not locked at exit)
+ *	*slot: BufferHeapTupleTableSlot filled with tuple
  *	*tmfd: filled in failure cases (see below)
  *
  * Function results are the same as the ones for table_tuple_lock().
  *
+ * If *slot already contains the target tuple, it takes advantage on that by
+ * skipping the ReadBuffer() call.
+ *
  * In the failure cases other than TM_Invisible, the routine fills
  * *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
  * if necessary), and t_cmax (the last only for TM_SelfModified,
@@ -4079,15 +4165,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  * See README.tuplock for a thorough explanation of this mechanism.
  */
 TM_Result
-heap_lock_tuple(Relation relation, HeapTuple tuple,
+heap_lock_tuple(Relation relation, ItemPointer tid, TupleTableSlot *slot,
 				CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
-				bool follow_updates,
-				Buffer *buffer, TM_FailureData *tmfd)
+				bool follow_updates, TM_FailureData *tmfd)
 {
 	TM_Result	result;
-	ItemPointer tid = &(tuple->t_self);
 	ItemId		lp;
 	Page		page;
+	Buffer		buffer;
 	Buffer		vmbuffer = InvalidBuffer;
 	BlockNumber block;
 	TransactionId xid,
@@ -4099,8 +4184,24 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	bool		skip_tuple_lock = false;
 	bool		have_tuple_lock = false;
 	bool		cleared_all_frozen = false;
+	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+	HeapTuple	tuple = &bslot->base.tupdata;
+
+	Assert(TTS_IS_BUFFERTUPLE(slot));
 
-	*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+	/* Take advantage if slot already contains the relevant tuple  */
+	if (!TTS_EMPTY(slot) &&
+		slot->tts_tableOid == relation->rd_id &&
+		ItemPointerCompare(&slot->tts_tid, tid) == 0 &&
+		BufferIsValid(bslot->buffer))
+	{
+		buffer = bslot->buffer;
+		IncrBufferRefCount(buffer);
+	}
+	else
+	{
+		buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+	}
 	block = ItemPointerGetBlockNumber(tid);
 
 	/*
@@ -4109,21 +4210,22 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	 * in the middle of changing this, so we'll need to recheck after we have
 	 * the lock.
 	 */
-	if (PageIsAllVisible(BufferGetPage(*buffer)))
+	if (PageIsAllVisible(BufferGetPage(buffer)))
 		visibilitymap_pin(relation, block, &vmbuffer);
 
-	LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
-	page = BufferGetPage(*buffer);
+	page = BufferGetPage(buffer);
 	lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
 	Assert(ItemIdIsNormal(lp));
 
+	tuple->t_self = *tid;
 	tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
 	tuple->t_len = ItemIdGetLength(lp);
 	tuple->t_tableOid = RelationGetRelid(relation);
 
 l3:
-	result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
+	result = HeapTupleSatisfiesUpdate(tuple, cid, buffer);
 
 	if (result == TM_Invisible)
 	{
@@ -4152,7 +4254,7 @@ l3:
 		infomask2 = tuple->t_data->t_infomask2;
 		ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
 
-		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 		/*
 		 * If any subtransaction of the current top transaction already holds
@@ -4304,12 +4406,12 @@ l3:
 					{
 						result = res;
 						/* recovery code expects to have buffer lock held */
-						LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+						LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 						goto failed;
 					}
 				}
 
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 				/*
 				 * Make sure it's still an appropriate lock, else start over.
@@ -4344,7 +4446,7 @@ l3:
 			if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
 				!HEAP_XMAX_IS_EXCL_LOCKED(infomask))
 			{
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 				/*
 				 * Make sure it's still an appropriate lock, else start over.
@@ -4372,7 +4474,7 @@ l3:
 					 * No conflict, but if the xmax changed under us in the
 					 * meantime, start over.
 					 */
-					LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+					LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 					if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
 						!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
 											 xwait))
@@ -4384,7 +4486,7 @@ l3:
 			}
 			else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
 			{
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 				/* if the xmax changed in the meantime, start over */
 				if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
@@ -4412,7 +4514,7 @@ l3:
 			TransactionIdIsCurrentTransactionId(xwait))
 		{
 			/* ... but if the xmax changed in the meantime, start over */
-			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 			if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
 				!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
 									 xwait))
@@ -4434,7 +4536,7 @@ l3:
 		 */
 		if (require_sleep && (result == TM_Updated || result == TM_Deleted))
 		{
-			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 			goto failed;
 		}
 		else if (require_sleep)
@@ -4459,7 +4561,7 @@ l3:
 				 */
 				result = TM_WouldBlock;
 				/* recovery code expects to have buffer lock held */
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 				goto failed;
 			}
 
@@ -4485,7 +4587,7 @@ l3:
 						{
 							result = TM_WouldBlock;
 							/* recovery code expects to have buffer lock held */
-							LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+							LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 							goto failed;
 						}
 						break;
@@ -4525,7 +4627,7 @@ l3:
 						{
 							result = TM_WouldBlock;
 							/* recovery code expects to have buffer lock held */
-							LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+							LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 							goto failed;
 						}
 						break;
@@ -4551,12 +4653,12 @@ l3:
 				{
 					result = res;
 					/* recovery code expects to have buffer lock held */
-					LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+					LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 					goto failed;
 				}
 			}
 
-			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 			/*
 			 * xwait is done, but if xwait had just locked the tuple then some
@@ -4578,7 +4680,7 @@ l3:
 				 * don't check for this in the multixact case, because some
 				 * locker transactions might still be running.
 				 */
-				UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
+				UpdateXmaxHintBits(tuple->t_data, buffer, xwait);
 			}
 		}
 
@@ -4637,9 +4739,9 @@ failed:
 	 */
 	if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
 	{
-		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		visibilitymap_pin(relation, block, &vmbuffer);
-		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 		goto l3;
 	}
 
@@ -4702,7 +4804,7 @@ failed:
 		cleared_all_frozen = true;
 
 
-	MarkBufferDirty(*buffer);
+	MarkBufferDirty(buffer);
 
 	/*
 	 * XLOG stuff.  You might think that we don't need an XLOG record because
@@ -4722,7 +4824,7 @@ failed:
 		XLogRecPtr	recptr;
 
 		XLogBeginInsert();
-		XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+		XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
 		xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
 		xlrec.locking_xid = xid;
@@ -4743,7 +4845,7 @@ failed:
 	result = TM_Ok;
 
 out_locked:
-	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 out_unlocked:
 	if (BufferIsValid(vmbuffer))
@@ -4761,6 +4863,9 @@ out_unlocked:
 	if (have_tuple_lock)
 		UnlockTupleTuplock(relation, tid, mode);
 
+	/* Put the target tuple to the slot */
+	ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
+
 	return result;
 }
 
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 97b5daee920..5781096203d 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -45,11 +45,11 @@
 #include "utils/builtins.h"
 #include "utils/rel.h"
 
-static TM_Result heapam_tuple_lock_internal(Relation relation, ItemPointer tid,
-											Snapshot snapshot, TupleTableSlot *slot,
-											CommandId cid, LockTupleMode mode,
-											LockWaitPolicy wait_policy, uint8 flags,
-											TM_FailureData *tmfd, bool updated);
+static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid,
+								   Snapshot snapshot, TupleTableSlot *slot,
+								   CommandId cid, LockTupleMode mode,
+								   LockWaitPolicy wait_policy, uint8 flags,
+								   TM_FailureData *tmfd);
 
 static void reform_and_rewrite_tuple(HeapTuple tuple,
 									 Relation OldHeap, Relation NewHeap,
@@ -304,9 +304,9 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
 
 static TM_Result
 heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
-					Snapshot snapshot, Snapshot crosscheck, bool wait,
+					Snapshot snapshot, Snapshot crosscheck, int options,
 					TM_FailureData *tmfd, bool changingPart,
-					LazyTupleTableSlot *lockedSlot)
+					TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 
@@ -315,33 +315,32 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
 	 * the storage itself is cleaning the dead tuples by itself, it is the
 	 * time to call the index tuple deletion also.
 	 */
-	result = heap_delete(relation, tid, cid, crosscheck, wait,
-						 tmfd, changingPart);
+	result = heap_delete(relation, tid, cid, crosscheck, options,
+						 tmfd, changingPart, oldSlot);
 
 	/*
 	 * If the tuple has been concurrently updated, then get the lock on it.
-	 * (Do this if caller asked for tat by providing a 'lockedSlot'.) With the
-	 * lock held retry of delete should succeed even if there are more
-	 * concurrent update attempts.
+	 * (Do only if caller asked for this by setting the
+	 * TABLE_MODIFY_LOCK_UPDATED option)  With the lock held retry of the
+	 * delete should succeed even if there are more concurrent update
+	 * attempts.
 	 */
-	if (result == TM_Updated && lockedSlot)
+	if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
 	{
-		TupleTableSlot *evalSlot;
-
-		Assert(wait);
-
-		evalSlot = LAZY_TTS_EVAL(lockedSlot);
-		result = heapam_tuple_lock_internal(relation, tid, snapshot,
-											evalSlot, cid, LockTupleExclusive,
-											LockWaitBlock,
-											TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											tmfd, true);
+		/*
+		 * heapam_tuple_lock() will take advantage of tuple loaded into
+		 * oldSlot by heap_delete().
+		 */
+		result = heapam_tuple_lock(relation, tid, snapshot,
+								   oldSlot, cid, LockTupleExclusive,
+								   (options & TABLE_MODIFY_WAIT) ?
+								   LockWaitBlock :
+								   LockWaitSkip,
+								   TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+								   tmfd);
 
 		if (result == TM_Ok)
-		{
-			tmfd->traversed = true;
 			return TM_Updated;
-		}
 	}
 
 	return result;
@@ -351,9 +350,9 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
 static TM_Result
 heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 					CommandId cid, Snapshot snapshot, Snapshot crosscheck,
-					bool wait, TM_FailureData *tmfd,
+					int options, TM_FailureData *tmfd,
 					LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
-					LazyTupleTableSlot *lockedSlot)
+					TupleTableSlot *oldSlot)
 {
 	bool		shouldFree = true;
 	HeapTuple	tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
@@ -363,8 +362,8 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 	slot->tts_tableOid = RelationGetRelid(relation);
 	tuple->t_tableOid = slot->tts_tableOid;
 
-	result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
-						 tmfd, lockmode, update_indexes);
+	result = heap_update(relation, otid, tuple, cid, crosscheck, options,
+						 tmfd, lockmode, update_indexes, oldSlot);
 	ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
 
 	/*
@@ -373,8 +372,8 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 	 * Note: heap_update returns the tid (location) of the new tuple in the
 	 * t_self field.
 	 *
-	 * If the update is not HOT, we must update all indexes. If the update
-	 * is HOT, it could be that we updated summarized columns, so we either
+	 * If the update is not HOT, we must update all indexes. If the update is
+	 * HOT, it could be that we updated summarized columns, so we either
 	 * update only summarized indexes, or none at all.
 	 */
 	if (result != TM_Ok)
@@ -393,28 +392,27 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 
 	/*
 	 * If the tuple has been concurrently updated, then get the lock on it.
-	 * (Do this if caller asked for tat by providing a 'lockedSlot'.) With the
-	 * lock held retry of update should succeed even if there are more
-	 * concurrent update attempts.
+	 * (Do only if caller asked for this by setting the
+	 * TABLE_MODIFY_LOCK_UPDATED option)  With the lock held retry of the
+	 * update should succeed even if there are more concurrent update
+	 * attempts.
 	 */
-	if (result == TM_Updated && lockedSlot)
+	if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
 	{
-		TupleTableSlot *evalSlot;
-
-		Assert(wait);
-
-		evalSlot = LAZY_TTS_EVAL(lockedSlot);
-		result = heapam_tuple_lock_internal(relation, otid, snapshot,
-											evalSlot, cid, *lockmode,
-											LockWaitBlock,
-											TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											tmfd, true);
+		/*
+		 * heapam_tuple_lock() will take advantage of tuple loaded into
+		 * oldSlot by heap_update().
+		 */
+		result = heapam_tuple_lock(relation, otid, snapshot,
+								   oldSlot, cid, *lockmode,
+								   (options & TABLE_MODIFY_WAIT) ?
+								   LockWaitBlock :
+								   LockWaitSkip,
+								   TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+								   tmfd);
 
 		if (result == TM_Ok)
-		{
-			tmfd->traversed = true;
 			return TM_Updated;
-		}
 	}
 
 	return result;
@@ -425,26 +423,9 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 				  TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				  LockWaitPolicy wait_policy, uint8 flags,
 				  TM_FailureData *tmfd)
-{
-	return heapam_tuple_lock_internal(relation, tid, snapshot, slot, cid,
-									  mode, wait_policy, flags, tmfd, false);
-}
-
-/*
- * This routine does the work for heapam_tuple_lock(), but also support
- * `updated` argument to re-use the work done by heapam_tuple_update() or
- * heapam_tuple_delete() on figuring out that tuple was concurrently updated.
- */
-static TM_Result
-heapam_tuple_lock_internal(Relation relation, ItemPointer tid,
-						   Snapshot snapshot, TupleTableSlot *slot,
-						   CommandId cid, LockTupleMode mode,
-						   LockWaitPolicy wait_policy, uint8 flags,
-						   TM_FailureData *tmfd, bool updated)
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	TM_Result	result;
-	Buffer		buffer = InvalidBuffer;
 	HeapTuple	tuple = &bslot->base.tupdata;
 	bool		follow_updates;
 
@@ -454,27 +435,14 @@ heapam_tuple_lock_internal(Relation relation, ItemPointer tid,
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
 tuple_lock_retry:
-	tuple->t_self = *tid;
-	if (!updated)
-		result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
-								 follow_updates, &buffer, tmfd);
-	else
-		result = TM_Updated;
+	result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
+							 follow_updates, tmfd);
 
 	if (result == TM_Updated &&
 		(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
 	{
-		if (!updated)
-		{
-			/* Should not encounter speculative tuple on recheck */
-			Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
-
-			ReleaseBuffer(buffer);
-		}
-		else
-		{
-			updated = false;
-		}
+		/* Should not encounter speculative tuple on recheck */
+		Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
 
 		if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
 		{
@@ -497,6 +465,8 @@ tuple_lock_retry:
 			InitDirtySnapshot(SnapshotDirty);
 			for (;;)
 			{
+				Buffer		buffer = InvalidBuffer;
+
 				if (ItemPointerIndicatesMovedPartitions(tid))
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@@ -591,7 +561,7 @@ tuple_lock_retry:
 					/*
 					 * This is a live tuple, so try to lock it again.
 					 */
-					ReleaseBuffer(buffer);
+					ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
 					goto tuple_lock_retry;
 				}
 
@@ -602,7 +572,7 @@ tuple_lock_retry:
 				 */
 				if (tuple->t_data == NULL)
 				{
-					Assert(!BufferIsValid(buffer));
+					ReleaseBuffer(buffer);
 					return TM_Deleted;
 				}
 
@@ -655,9 +625,6 @@ tuple_lock_retry:
 	slot->tts_tableOid = RelationGetRelid(relation);
 	tuple->t_tableOid = slot->tts_tableOid;
 
-	/* store in slot, transferring existing pin */
-	ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
-
 	return result;
 }
 
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 2a1a6ced3c7..0167a2fb1fc 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -297,17 +297,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
  * via ereport().
  */
 void
-simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
+simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot,
+						  TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
+	int			options = TABLE_MODIFY_WAIT;	/* wait for commit */
+
+	/* Fetch old tuple if the relevant slot is provided */
+	if (oldSlot)
+		options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
 
 	result = table_tuple_delete(rel, tid,
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
-								true /* wait for commit */ ,
+								options,
 								&tmfd, false /* changingPart */ ,
-								NULL);
+								oldSlot);
 
 	switch (result)
 	{
@@ -346,18 +352,24 @@ void
 simple_table_tuple_update(Relation rel, ItemPointer otid,
 						  TupleTableSlot *slot,
 						  Snapshot snapshot,
-						  TU_UpdateIndexes *update_indexes)
+						  TU_UpdateIndexes *update_indexes,
+						  TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
 	LockTupleMode lockmode;
+	int			options = TABLE_MODIFY_WAIT;	/* wait for commit */
+
+	/* Fetch old tuple if the relevant slot is provided */
+	if (oldSlot)
+		options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
 
 	result = table_tuple_update(rel, otid, slot,
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
-								true /* wait for commit */ ,
+								options,
 								&tmfd, &lockmode, update_indexes,
-								NULL);
+								oldSlot);
 
 	switch (result)
 	{
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 0f2de7e2e01..5c4f8bae188 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2784,8 +2784,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 void
 ExecARDeleteTriggers(EState *estate,
 					 ResultRelInfo *relinfo,
-					 ItemPointer tupleid,
 					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *slot,
 					 TransitionCaptureState *transition_capture,
 					 bool is_crosspart_update)
 {
@@ -2794,20 +2794,11 @@ ExecARDeleteTriggers(EState *estate,
 	if ((trigdesc && trigdesc->trig_delete_after_row) ||
 		(transition_capture && transition_capture->tcs_delete_old_table))
 	{
-		TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
-
-		Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
-		if (fdw_trigtuple == NULL)
-			GetTupleForTrigger(estate,
-							   NULL,
-							   relinfo,
-							   tupleid,
-							   LockTupleExclusive,
-							   slot,
-							   NULL,
-							   NULL,
-							   NULL);
-		else
+		/*
+		 * Put the FDW old tuple to the slot.  Otherwise, caller is expected
+		 * to have old tuple alredy fetched to the slot.
+		 */
+		if (fdw_trigtuple != NULL)
 			ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
 
 		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -3080,18 +3071,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
  * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
  * and destination partitions, respectively, of a cross-partition update of
  * the root partitioned table mentioned in the query, given by 'relinfo'.
- * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
- * partition, and 'newslot' contains the "new" tuple in the destination
- * partition.  This interface allows to support the requirements of
- * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
- * that case.
+ * 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
+ * contains the "new" tuple in the destination partition.  This interface
+ * allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
+ * is_crosspart_update must be true in that case.
  */
 void
 ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 					 ResultRelInfo *src_partinfo,
 					 ResultRelInfo *dst_partinfo,
-					 ItemPointer tupleid,
 					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *oldslot,
 					 TupleTableSlot *newslot,
 					 List *recheckIndexes,
 					 TransitionCaptureState *transition_capture,
@@ -3110,29 +3100,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 		 * separately for DELETE and INSERT to capture transition table rows.
 		 * In such case, either old tuple or new tuple can be NULL.
 		 */
-		TupleTableSlot *oldslot;
-		ResultRelInfo *tupsrc;
-
 		Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
 			   !is_crosspart_update);
 
-		tupsrc = src_partinfo ? src_partinfo : relinfo;
-		oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
-
-		if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
-			GetTupleForTrigger(estate,
-							   NULL,
-							   tupsrc,
-							   tupleid,
-							   LockTupleExclusive,
-							   oldslot,
-							   NULL,
-							   NULL,
-							   NULL);
-		else if (fdw_trigtuple != NULL)
+		if (fdw_trigtuple != NULL)
+		{
+			Assert(oldslot);
 			ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
-		else
-			ExecClearTuple(oldslot);
+		}
 
 		AfterTriggerSaveEvent(estate, relinfo,
 							  src_partinfo, dst_partinfo,
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 9dd71684615..31ff5cd9f41 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -545,6 +545,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 	{
 		List	   *recheckIndexes = NIL;
 		TU_UpdateIndexes update_indexes;
+		TupleTableSlot *oldSlot = NULL;
 
 		/* Compute stored generated columns */
 		if (rel->rd_att->constr &&
@@ -558,8 +559,12 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		if (rel->rd_rel->relispartition)
 			ExecPartitionCheck(resultRelInfo, slot, estate, true);
 
+		if (resultRelInfo->ri_TrigDesc &&
+			resultRelInfo->ri_TrigDesc->trig_update_after_row)
+			oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
 		simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
-								  &update_indexes);
+								  &update_indexes, oldSlot);
 
 		if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
@@ -570,7 +575,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		/* AFTER ROW UPDATE Triggers */
 		ExecARUpdateTriggers(estate, resultRelInfo,
 							 NULL, NULL,
-							 tid, NULL, slot,
+							 NULL, oldSlot, slot,
 							 recheckIndexes, NULL, false);
 
 		list_free(recheckIndexes);
@@ -604,12 +609,18 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
 
 	if (!skip_tuple)
 	{
+		TupleTableSlot *oldSlot = NULL;
+
+		if (resultRelInfo->ri_TrigDesc &&
+			resultRelInfo->ri_TrigDesc->trig_delete_after_row)
+			oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
 		/* OK, delete the tuple */
-		simple_table_tuple_delete(rel, tid, estate->es_snapshot);
+		simple_table_tuple_delete(rel, tid, estate->es_snapshot, oldSlot);
 
 		/* AFTER ROW DELETE Triggers */
 		ExecARDeleteTriggers(estate, resultRelInfo,
-							 tid, NULL, NULL, false);
+							 NULL, oldSlot, NULL, false);
 	}
 }
 
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index e3503756818..de49e48bae6 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -111,7 +111,7 @@ typedef struct UpdateContext
 {
 	bool		updated;		/* did UPDATE actually occur? */
 	bool		crossPartUpdate;	/* was it a cross-partition update? */
-	TU_UpdateIndexes updateIndexes;	/* Which index updates are required? */
+	TU_UpdateIndexes updateIndexes; /* Which index updates are required? */
 
 	/*
 	 * Lock mode to acquire on the latest tuple version before performing
@@ -133,7 +133,7 @@ static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
 											   ResultRelInfo *sourcePartInfo,
 											   ResultRelInfo *destPartInfo,
 											   ItemPointer tupleid,
-											   TupleTableSlot *oldslot,
+											   TupleTableSlot *oldSlot,
 											   TupleTableSlot *newslot);
 static bool ExecOnConflictUpdate(ModifyTableContext *context,
 								 ResultRelInfo *resultRelInfo,
@@ -570,6 +570,10 @@ ExecInitInsertProjection(ModifyTableState *mtstate,
 	resultRelInfo->ri_newTupleSlot =
 		table_slot_create(resultRelInfo->ri_RelationDesc,
 						  &estate->es_tupleTable);
+	if (node->onConflictAction == ONCONFLICT_UPDATE)
+		resultRelInfo->ri_oldTupleSlot =
+			table_slot_create(resultRelInfo->ri_RelationDesc,
+							  &estate->es_tupleTable);
 
 	/* Build ProjectionInfo if needed (it probably isn't). */
 	if (need_projection)
@@ -1160,7 +1164,7 @@ ExecInsert(ModifyTableContext *context,
 		ExecARUpdateTriggers(estate, resultRelInfo,
 							 NULL, NULL,
 							 NULL,
-							 NULL,
+							 resultRelInfo->ri_oldTupleSlot,
 							 slot,
 							 NULL,
 							 mtstate->mt_transition_capture,
@@ -1324,62 +1328,28 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	return true;
 }
 
-/*
- * The implementation for LazyTupleTableSlot wrapper for EPQ slot to be passed
- * to table_tuple_update()/table_tuple_delete().
- */
-typedef struct
-{
-	EPQState   *epqstate;
-	ResultRelInfo *resultRelInfo;
-} GetEPQSlotArg;
-
-static TupleTableSlot *
-GetEPQSlot(void *arg)
-{
-	GetEPQSlotArg *slotArg = (GetEPQSlotArg *) arg;
-
-	return EvalPlanQualSlot(slotArg->epqstate,
-							slotArg->resultRelInfo->ri_RelationDesc,
-							slotArg->resultRelInfo->ri_RangeTableIndex);
-}
-
 /*
  * ExecDeleteAct -- subroutine for ExecDelete
  *
  * Actually delete the tuple from a plain table.
  *
- * If the 'lockUpdated' flag is set and the target tuple is updated, then
- * the latest version gets locked and fetched into the EPQ slot.
- *
  * Caller is in charge of doing EvalPlanQual as necessary
  */
 static TM_Result
 ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-			  ItemPointer tupleid, bool changingPart, bool lockUpdated)
+			  ItemPointer tupleid, bool changingPart, int options,
+			  TupleTableSlot *oldSlot)
 {
 	EState	   *estate = context->estate;
-	GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
-	LazyTupleTableSlot lazyEPQSlot,
-			   *lazyEPQSlotPtr;
 
-	if (lockUpdated)
-	{
-		MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
-		lazyEPQSlotPtr = &lazyEPQSlot;
-	}
-	else
-	{
-		lazyEPQSlotPtr = NULL;
-	}
 	return table_tuple_delete(resultRelInfo->ri_RelationDesc, tupleid,
 							  estate->es_output_cid,
 							  estate->es_snapshot,
 							  estate->es_crosscheck_snapshot,
-							  true /* wait for commit */ ,
+							  options /* wait for commit */ ,
 							  &context->tmfd,
 							  changingPart,
-							  lazyEPQSlotPtr);
+							  oldSlot);
 }
 
 /*
@@ -1391,7 +1361,8 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
  */
 static void
 ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-				   ItemPointer tupleid, HeapTuple oldtuple, bool changingPart)
+				   ItemPointer tupleid, HeapTuple oldtuple,
+				   TupleTableSlot *slot, bool changingPart)
 {
 	ModifyTableState *mtstate = context->mtstate;
 	EState	   *estate = context->estate;
@@ -1409,8 +1380,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	{
 		ExecARUpdateTriggers(estate, resultRelInfo,
 							 NULL, NULL,
-							 tupleid, oldtuple,
-							 NULL, NULL, mtstate->mt_transition_capture,
+							 oldtuple,
+							 slot, NULL, NULL, mtstate->mt_transition_capture,
 							 false);
 
 		/*
@@ -1421,10 +1392,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	}
 
 	/* AFTER ROW DELETE Triggers */
-	ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
+	ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot,
 						 ar_delete_trig_tcs, changingPart);
 }
 
+/*
+ * Initializes the tuple slot in a ResultRelInfo for DELETE action.
+ *
+ * We mark 'projectNewInfoValid' even though the projections themselves
+ * are not initialized here.
+ */
+static void
+ExecInitDeleteTupleSlot(ModifyTableState *mtstate,
+						ResultRelInfo *resultRelInfo)
+{
+	EState	   *estate = mtstate->ps.state;
+
+	Assert(!resultRelInfo->ri_projectNewInfoValid);
+
+	resultRelInfo->ri_oldTupleSlot =
+		table_slot_create(resultRelInfo->ri_RelationDesc,
+						  &estate->es_tupleTable);
+	resultRelInfo->ri_projectNewInfoValid = true;
+}
+
 /* ----------------------------------------------------------------
  *		ExecDelete
  *
@@ -1452,6 +1443,7 @@ ExecDelete(ModifyTableContext *context,
 		   ResultRelInfo *resultRelInfo,
 		   ItemPointer tupleid,
 		   HeapTuple oldtuple,
+		   TupleTableSlot *oldSlot,
 		   bool processReturning,
 		   bool changingPart,
 		   bool canSetTag,
@@ -1514,6 +1506,11 @@ ExecDelete(ModifyTableContext *context,
 	}
 	else
 	{
+		int			options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+		if (!IsolationUsesXactSnapshot())
+			options |= TABLE_MODIFY_LOCK_UPDATED;
+
 		/*
 		 * delete the tuple
 		 *
@@ -1525,7 +1522,7 @@ ExecDelete(ModifyTableContext *context,
 		 */
 ldelete:
 		result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
-							   !IsolationUsesXactSnapshot());
+							   options, oldSlot);
 
 		switch (result)
 		{
@@ -1569,7 +1566,6 @@ ldelete:
 
 			case TM_Updated:
 				{
-					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
 
 					if (IsolationUsesXactSnapshot())
@@ -1578,34 +1574,14 @@ ldelete:
 								 errmsg("could not serialize access due to concurrent update")));
 
 					/*
-					 * ExecDeleteAct() has already locked the old tuple for
-					 * us. Now we need to copy it to the right slot.
-					 */
-					EvalPlanQualBegin(context->epqstate);
-					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
-												 resultRelInfo->ri_RangeTableIndex);
-
-					/*
-					 * Save locked table for further processing for RETURNING
-					 * clause.
+					 * We need to do EPQ. The latest tuple is already found
+					 * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
 					 */
-					if (processReturning &&
-						resultRelInfo->ri_projectReturning &&
-						!resultRelInfo->ri_FdwRoutine)
-					{
-						TupleTableSlot *returningSlot;
-
-						returningSlot = ExecGetReturningSlot(estate,
-															 resultRelInfo);
-						ExecCopySlot(returningSlot, inputslot);
-						ExecMaterializeSlot(returningSlot);
-					}
-
 					Assert(context->tmfd.traversed);
 					epqslot = EvalPlanQual(context->epqstate,
 										   resultRelationDesc,
 										   resultRelInfo->ri_RangeTableIndex,
-										   inputslot);
+										   oldSlot);
 					if (TupIsNull(epqslot))
 						/* Tuple not passing quals anymore, exiting... */
 						return NULL;
@@ -1654,7 +1630,8 @@ ldelete:
 	if (tupleDeleted)
 		*tupleDeleted = true;
 
-	ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart);
+	ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple,
+					   oldSlot, changingPart);
 
 	/* Process RETURNING if present and if requested */
 	if (processReturning && resultRelInfo->ri_projectReturning)
@@ -1672,22 +1649,13 @@ ldelete:
 		}
 		else
 		{
-			/*
-			 * Tuple can be already fetched to the returning slot in case
-			 * we've previously locked it.  Fetch the tuple only if the slot
-			 * is empty.
-			 */
+			/* Copy old tuple to the returning slot */
 			slot = ExecGetReturningSlot(estate, resultRelInfo);
 			if (oldtuple != NULL)
-			{
 				ExecForceStoreHeapTuple(oldtuple, slot, false);
-			}
-			else if (TupIsNull(slot))
-			{
-				if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
-												   SnapshotAny, slot))
-					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
-			}
+			else
+				ExecCopySlot(slot, oldSlot);
+			Assert(!TupIsNull(slot));
 		}
 
 		rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot);
@@ -1786,12 +1754,16 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
 		MemoryContextSwitchTo(oldcxt);
 	}
 
+	/* Make sure ri_oldTupleSlot is initialized. */
+	if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+		ExecInitUpdateProjection(mtstate, resultRelInfo);
+
 	/*
 	 * Row movement, part 1.  Delete the tuple, but skip RETURNING processing.
 	 * We want to return rows from INSERT.
 	 */
 	ExecDelete(context, resultRelInfo,
-			   tupleid, oldtuple,
+			   tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot,
 			   false,			/* processReturning */
 			   true,			/* changingPart */
 			   false,			/* canSetTag */
@@ -1832,21 +1804,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
 			return true;
 		else
 		{
-			/* Fetch the most recent version of old tuple. */
-			TupleTableSlot *oldSlot;
-
-			/* ... but first, make sure ri_oldTupleSlot is initialized. */
-			if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
-				ExecInitUpdateProjection(mtstate, resultRelInfo);
-			oldSlot = resultRelInfo->ri_oldTupleSlot;
-			if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
-											   tupleid,
-											   SnapshotAny,
-											   oldSlot))
-				elog(ERROR, "failed to fetch tuple being updated");
-			/* and project the new tuple to retry the UPDATE with */
+			/*
+			 * ExecDelete already fetches the most recent version of old tuple
+			 * to resultRelInfo->ri_RelationDesc.  So, just project the new
+			 * tuple to retry the UPDATE with.
+			 */
 			*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
-												oldSlot);
+												resultRelInfo->ri_oldTupleSlot);
 			return false;
 		}
 	}
@@ -1965,15 +1929,13 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
 static TM_Result
 ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 			  ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
-			  bool canSetTag, bool lockUpdated, UpdateContext *updateCxt)
+			  bool canSetTag, int options, TupleTableSlot *oldSlot,
+			  UpdateContext *updateCxt)
 {
 	EState	   *estate = context->estate;
 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
 	bool		partition_constraint_failed;
 	TM_Result	result;
-	GetEPQSlotArg slotArg = {context->epqstate, resultRelInfo};
-	LazyTupleTableSlot lazyEPQSlot,
-			   *lazyEPQSlotPtr;
 
 	updateCxt->crossPartUpdate = false;
 
@@ -2060,7 +2022,8 @@ lreplace:
 				ExecCrossPartitionUpdateForeignKey(context,
 												   resultRelInfo,
 												   insert_destrel,
-												   tupleid, slot,
+												   tupleid,
+												   resultRelInfo->ri_oldTupleSlot,
 												   inserted_tuple);
 
 			return TM_Ok;
@@ -2099,23 +2062,14 @@ lreplace:
 	 * for referential integrity updates in transaction-snapshot mode
 	 * transactions.
 	 */
-	if (lockUpdated)
-	{
-		MAKE_LAZY_TTS(&lazyEPQSlot, GetEPQSlot, &slotArg);
-		lazyEPQSlotPtr = &lazyEPQSlot;
-	}
-	else
-	{
-		lazyEPQSlotPtr = NULL;
-	}
 	result = table_tuple_update(resultRelationDesc, tupleid, slot,
 								estate->es_output_cid,
 								estate->es_snapshot,
 								estate->es_crosscheck_snapshot,
-								true /* wait for commit */ ,
+								options /* wait for commit */ ,
 								&context->tmfd, &updateCxt->lockmode,
 								&updateCxt->updateIndexes,
-								lazyEPQSlotPtr);
+								oldSlot);
 	if (result == TM_Ok)
 		updateCxt->updated = true;
 
@@ -2131,7 +2085,8 @@ lreplace:
 static void
 ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
 				   ResultRelInfo *resultRelInfo, ItemPointer tupleid,
-				   HeapTuple oldtuple, TupleTableSlot *slot)
+				   HeapTuple oldtuple, TupleTableSlot *slot,
+				   TupleTableSlot *oldSlot)
 {
 	ModifyTableState *mtstate = context->mtstate;
 	List	   *recheckIndexes = NIL;
@@ -2147,7 +2102,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
 	/* AFTER ROW UPDATE Triggers */
 	ExecARUpdateTriggers(context->estate, resultRelInfo,
 						 NULL, NULL,
-						 tupleid, oldtuple, slot,
+						 oldtuple, oldSlot, slot,
 						 recheckIndexes,
 						 mtstate->operation == CMD_INSERT ?
 						 mtstate->mt_oc_transition_capture :
@@ -2236,7 +2191,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
 	/* Perform the root table's triggers. */
 	ExecARUpdateTriggers(context->estate,
 						 rootRelInfo, sourcePartInfo, destPartInfo,
-						 tupleid, NULL, newslot, NIL, NULL, true);
+						 NULL, oldslot, newslot, NIL, NULL, true);
 }
 
 /* ----------------------------------------------------------------
@@ -2259,6 +2214,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
  *		no relevant triggers.
  *
  *		slot contains the new tuple value to be stored.
+ *		oldSlot is the slot to store the old tuple.
  *		planSlot is the output of the ModifyTable's subplan; we use it
  *		to access values from other input tables (for RETURNING),
  *		row-ID junk columns, etc.
@@ -2269,7 +2225,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
 static TupleTableSlot *
 ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 		   ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
-		   bool canSetTag, bool locked)
+		   TupleTableSlot *oldSlot, bool canSetTag, bool locked)
 {
 	EState	   *estate = context->estate;
 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2322,6 +2278,11 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	}
 	else
 	{
+		int			options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+		if (!locked && !IsolationUsesXactSnapshot())
+			options |= TABLE_MODIFY_LOCK_UPDATED;
+
 		/*
 		 * If we generate a new candidate tuple after EvalPlanQual testing, we
 		 * must loop back here to try again.  (We don't need to redo triggers,
@@ -2331,8 +2292,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 		 */
 redo_act:
 		result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
-							   canSetTag, !IsolationUsesXactSnapshot(),
-							   &updateCxt);
+							   canSetTag, options, oldSlot, &updateCxt);
 
 		/*
 		 * If ExecUpdateAct reports that a cross-partition update was done,
@@ -2383,9 +2343,7 @@ redo_act:
 
 			case TM_Updated:
 				{
-					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
-					TupleTableSlot *oldSlot;
 
 					if (IsolationUsesXactSnapshot())
 						ereport(ERROR,
@@ -2394,35 +2352,20 @@ redo_act:
 					Assert(!locked);
 
 					/*
-					 * ExecUpdateAct() has already locked the old tuple for
-					 * us. Now we need to copy it to the right slot.
+					 * We need to do EPQ. The latest tuple is already found
+					 * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
 					 */
-					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
-												 resultRelInfo->ri_RangeTableIndex);
-
-					/* Make sure ri_oldTupleSlot is initialized. */
-					if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
-						ExecInitUpdateProjection(context->mtstate,
-												 resultRelInfo);
-
-					/*
-					 * Save the locked tuple for further calculation of the
-					 * new tuple.
-					 */
-					oldSlot = resultRelInfo->ri_oldTupleSlot;
-					ExecCopySlot(oldSlot, inputslot);
-					ExecMaterializeSlot(oldSlot);
 					Assert(context->tmfd.traversed);
-
 					epqslot = EvalPlanQual(context->epqstate,
 										   resultRelationDesc,
 										   resultRelInfo->ri_RangeTableIndex,
-										   inputslot);
+										   oldSlot);
 					if (TupIsNull(epqslot))
 						/* Tuple not passing quals anymore, exiting... */
 						return NULL;
 					slot = ExecGetUpdateNewTuple(resultRelInfo,
-												 epqslot, oldSlot);
+												 epqslot,
+												 oldSlot);
 					goto redo_act;
 				}
 
@@ -2447,7 +2390,7 @@ redo_act:
 		(estate->es_processed)++;
 
 	ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple,
-					   slot);
+					   slot, oldSlot);
 
 	/* Process RETURNING if present */
 	if (resultRelInfo->ri_projectReturning)
@@ -2665,6 +2608,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
 	*returning = ExecUpdate(context, resultRelInfo,
 							conflictTid, NULL,
 							resultRelInfo->ri_onConflict->oc_ProjSlot,
+							existing,
 							canSetTag, true);
 
 	/*
@@ -2803,7 +2747,6 @@ lmerge_matched:
 	 * EvalPlanQual returns us a new tuple, which may not be visible to our
 	 * MVCC snapshot.
 	 */
-
 	if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
 									   tupleid,
 									   SnapshotAny,
@@ -2868,11 +2811,13 @@ lmerge_matched:
 					break;		/* concurrent update/delete */
 				}
 				result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL,
-									   newslot, false, false, &updateCxt);
+									   newslot, false, TABLE_MODIFY_WAIT, NULL,
+									   &updateCxt);
 				if (result == TM_Ok && updateCxt.updated)
 				{
 					ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
-									   tupleid, NULL, newslot);
+									   tupleid, NULL, newslot,
+									   resultRelInfo->ri_oldTupleSlot);
 					mtstate->mt_merge_updated += 1;
 				}
 				break;
@@ -2887,11 +2832,11 @@ lmerge_matched:
 					break;		/* concurrent update/delete */
 				}
 				result = ExecDeleteAct(context, resultRelInfo, tupleid,
-									   false, false);
+									   false, TABLE_MODIFY_WAIT, NULL);
 				if (result == TM_Ok)
 				{
 					ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
-									   false);
+									   resultRelInfo->ri_oldTupleSlot, false);
 					mtstate->mt_merge_deleted += 1;
 				}
 				break;
@@ -3793,12 +3738,18 @@ ExecModifyTable(PlanState *pstate)
 
 				/* Now apply the update. */
 				slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
-								  slot, node->canSetTag, false);
+								  slot, resultRelInfo->ri_oldTupleSlot,
+								  node->canSetTag, false);
 				break;
 
 			case CMD_DELETE:
+				/* Initialize slot for DELETE to fetch the old tuple */
+				if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+					ExecInitDeleteTupleSlot(node, resultRelInfo);
+
 				slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple,
-								  true, false, node->canSetTag, NULL, NULL);
+								  resultRelInfo->ri_oldTupleSlot, true, false,
+								  node->canSetTag, NULL, NULL);
 				break;
 
 			case CMD_MERGE:
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index faf50265191..19bcbfeea23 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -242,19 +242,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
 							  int ntuples, CommandId cid, int options,
 							  BulkInsertState bistate);
 extern TM_Result heap_delete(Relation relation, ItemPointer tid,
-							 CommandId cid, Snapshot crosscheck, bool wait,
-							 struct TM_FailureData *tmfd, bool changingPart);
+							 CommandId cid, Snapshot crosscheck, int options,
+							 struct TM_FailureData *tmfd, bool changingPart,
+							 TupleTableSlot *oldSlot);
 extern void heap_finish_speculative(Relation relation, ItemPointer tid);
 extern void heap_abort_speculative(Relation relation, ItemPointer tid);
 extern TM_Result heap_update(Relation relation, ItemPointer otid,
 							 HeapTuple newtup,
-							 CommandId cid, Snapshot crosscheck, bool wait,
+							 CommandId cid, Snapshot crosscheck, int options,
 							 struct TM_FailureData *tmfd, LockTupleMode *lockmode,
-							 TU_UpdateIndexes *update_indexes);
-extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
-								 CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
-								 bool follow_updates,
-								 Buffer *buffer, struct TM_FailureData *tmfd);
+							 TU_UpdateIndexes *update_indexes,
+							 TupleTableSlot *oldSlot);
+extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
+								 TupleTableSlot *slot,
+								 CommandId cid, LockTupleMode mode,
+								 LockWaitPolicy wait_policy, bool follow_updates,
+								 struct TM_FailureData *tmfd);
 
 extern void heap_inplace_update(Relation relation, HeapTuple tuple);
 extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 7159365e652..01c2a95d890 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -259,6 +259,11 @@ typedef struct TM_IndexDeleteOp
 /* Follow update chain and lock latest version of tuple */
 #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION		(1 << 1)
 
+/* "options" flag bits for table_tuple_update and table_tuple_delete */
+#define TABLE_MODIFY_WAIT			0x0001
+#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
+#define TABLE_MODIFY_LOCK_UPDATED	0x0004
+
 
 /* Typedef for callback function for table_index_build_scan */
 typedef void (*IndexBuildCallback) (Relation index,
@@ -528,10 +533,10 @@ typedef struct TableAmRoutine
 								 CommandId cid,
 								 Snapshot snapshot,
 								 Snapshot crosscheck,
-								 bool wait,
+								 int options,
 								 TM_FailureData *tmfd,
 								 bool changingPart,
-								 LazyTupleTableSlot *lockedSlot);
+								 TupleTableSlot *oldSlot);
 
 	/* see table_tuple_update() for reference about parameters */
 	TM_Result	(*tuple_update) (Relation rel,
@@ -540,11 +545,11 @@ typedef struct TableAmRoutine
 								 CommandId cid,
 								 Snapshot snapshot,
 								 Snapshot crosscheck,
-								 bool wait,
+								 int options,
 								 TM_FailureData *tmfd,
 								 LockTupleMode *lockmode,
 								 TU_UpdateIndexes *update_indexes,
-								 LazyTupleTableSlot *lockedSlot);
+								 TupleTableSlot *oldSlot);
 
 	/* see table_tuple_lock() for reference about parameters */
 	TM_Result	(*tuple_lock) (Relation rel,
@@ -1459,7 +1464,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
 }
 
 /*
- * Delete a tuple (or lock last tuple version if lockedSlot is given).
+ * Delete a tuple (and optionally lock the last tuple version).
  *
  * NB: do not call this directly unless prepared to deal with
  * concurrent-update conditions.  Use simple_table_tuple_delete instead.
@@ -1470,13 +1475,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
  *	cid - delete command ID (used for visibility test, and stored into
  *		cmax if successful)
  *	crosscheck - if not InvalidSnapshot, also check tuple against this
- *	wait - true if should wait for any conflicting update to commit/abort
+ *	options:
+ *		If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ *		If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ *		fetched into oldSlot when the update is successful.
+ *		If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ *		concurrently updated, then the last tuple version is locked and fetched
+ *		into oldSlot.
+ *
  * Output parameters:
  *	tmfd - filled in failure cases (see below)
  *	changingPart - true iff the tuple is being moved to another partition
  *		table due to an update of the partition key. Otherwise, false.
- *	lockedSlot - lazy slot to save the locked tuple if should lock the last
- *		row version during the concurrent update. NULL if not needed.
+ *	oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ *		TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ *		is specified.
  *
  * Normal, successful return value is TM_Ok, which means we did actually
  * delete it.  Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1488,18 +1501,18 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
  */
 static inline TM_Result
 table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
-				   Snapshot snapshot, Snapshot crosscheck, bool wait,
+				   Snapshot snapshot, Snapshot crosscheck, int options,
 				   TM_FailureData *tmfd, bool changingPart,
-				   LazyTupleTableSlot *lockedSlot)
+				   TupleTableSlot *oldSlot)
 {
 	return rel->rd_tableam->tuple_delete(rel, tid, cid,
 										 snapshot, crosscheck,
-										 wait, tmfd, changingPart,
-										 lockedSlot);
+										 options, tmfd, changingPart,
+										 oldSlot);
 }
 
 /*
- * Update a tuple (or lock last tuple version if lockedSlot is given).
+ * Update a tuple (and optionally lock the last tuple version).
  *
  * NB: do not call this directly unless you are prepared to deal with
  * concurrent-update conditions.  Use simple_table_tuple_update instead.
@@ -1511,14 +1524,22 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
  *	cid - update command ID (used for visibility test, and stored into
  *		cmax/cmin if successful)
  *	crosscheck - if not InvalidSnapshot, also check old tuple against this
- *	wait - true if should wait for any conflicting update to commit/abort
+ *	options:
+ *		If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ *		If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ *		fetched into oldSlot when the update is successful.
+ *		If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ *		concurrently updated, then the last tuple version is locked and fetched
+ *		into oldSlot.
+ *
  * Output parameters:
  *	tmfd - filled in failure cases (see below)
  *	lockmode - filled with lock mode acquired on tuple
  *  update_indexes - in success cases this is set to true if new index entries
  *		are required for this tuple
- * 	lockedSlot - lazy slot to save the locked tuple if should lock the last
- *		row version during the concurrent update. NULL if not needed.
+ *	oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ *		TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ *		is specified.
 
  * Normal, successful return value is TM_Ok, which means we did actually
  * update it.  Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1537,15 +1558,15 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
 static inline TM_Result
 table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
 				   CommandId cid, Snapshot snapshot, Snapshot crosscheck,
-				   bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
+				   int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
 				   TU_UpdateIndexes *update_indexes,
-				   LazyTupleTableSlot *lockedSlot)
+				   TupleTableSlot *oldSlot)
 {
 	return rel->rd_tableam->tuple_update(rel, otid, slot,
 										 cid, snapshot, crosscheck,
-										 wait, tmfd,
+										 options, tmfd,
 										 lockmode, update_indexes,
-										 lockedSlot);
+										 oldSlot);
 }
 
 /*
@@ -2061,10 +2082,12 @@ table_scan_sample_next_tuple(TableScanDesc scan,
 
 extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
 extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
-									  Snapshot snapshot);
+									  Snapshot snapshot,
+									  TupleTableSlot *oldSlot);
 extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
 									  TupleTableSlot *slot, Snapshot snapshot,
-									  TU_UpdateIndexes *update_indexes);
+									  TU_UpdateIndexes *update_indexes,
+									  TupleTableSlot *oldSlot);
 
 
 /* ----------------------------------------------------------------------------
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index 430e3ca7ddf..4903b4b7bc2 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -216,8 +216,8 @@ extern bool ExecBRDeleteTriggers(EState *estate,
 								 TM_FailureData *tmfd);
 extern void ExecARDeleteTriggers(EState *estate,
 								 ResultRelInfo *relinfo,
-								 ItemPointer tupleid,
 								 HeapTuple fdw_trigtuple,
+								 TupleTableSlot *slot,
 								 TransitionCaptureState *transition_capture,
 								 bool is_crosspart_update);
 extern bool ExecIRDeleteTriggers(EState *estate,
@@ -240,8 +240,8 @@ extern void ExecARUpdateTriggers(EState *estate,
 								 ResultRelInfo *relinfo,
 								 ResultRelInfo *src_partinfo,
 								 ResultRelInfo *dst_partinfo,
-								 ItemPointer tupleid,
 								 HeapTuple fdw_trigtuple,
+								 TupleTableSlot *oldslot,
 								 TupleTableSlot *newslot,
 								 List *recheckIndexes,
 								 TransitionCaptureState *transition_capture,
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index 2e13ecc3ffe..ff64b7cb98f 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -300,44 +300,6 @@ typedef struct MinimalTupleTableSlot
 #define TupIsNull(slot) \
 	((slot) == NULL || TTS_EMPTY(slot))
 
-/*----------
- * LazyTupleTableSlot -- a lazy version of TupleTableSlot.
- *
- * Sometimes caller might need to pass to the function a slot, which most
- * likely will reain undemanded.  Preallocating such slot would be a waste of
- * resources in the  majority of cases.  Lazy slot is aimed to resolve this
- * problem.  It is basically a promise to allocate the slot once it's needed.
- * Once callee needs the slot, it could get it using LAZY_TTS_EVAL(lazySlot)
- * macro.
- */
-typedef struct
-{
-	TupleTableSlot *slot;		/* cached slot or NULL if not yet allocated */
-	TupleTableSlot *(*getSlot) (void *arg); /* callback for slot allocation */
-	void	   *getSlotArg;		/* argument for the callback above */
-} LazyTupleTableSlot;
-
-/*
- * A constructor for the lazy slot.
- */
-#define MAKE_LAZY_TTS(lazySlot, callback, arg) \
-	do { \
-		(lazySlot)->slot = NULL; \
-		(lazySlot)->getSlot = callback; \
-		(lazySlot)->getSlotArg = arg; \
-	} while (false)
-
-/*
- * Macro for lazy slot evaluation.  NULL lazy slot evaluates to NULL slot.
- * Cached version is used if present.  Use the callback otherwise.
- */
-#define LAZY_TTS_EVAL(lazySlot) \
-	((lazySlot) ? \
-		((lazySlot)->slot ? \
-			(lazySlot)->slot : \
-			((lazySlot)->slot = (lazySlot)->getSlot((lazySlot)->getSlotArg))) : \
-		NULL)
-
 /* in executor/execTuples.c */
 extern TupleTableSlot *MakeTupleTableSlot(TupleDesc tupleDesc,
 										  const TupleTableSlotOps *tts_ops);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b97174d1607..5c0410869f7 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -956,7 +956,6 @@ GenerationPointer
 GenericCosts
 GenericXLogState
 GeqoPrivateData
-GetEPQSlotArg
 GetForeignJoinPaths_function
 GetForeignModifyBatchSize_function
 GetForeignPaths_function
@@ -1402,7 +1401,6 @@ LagTracker
 LargeObjectDesc
 LastAttnumInfo
 Latch
-LazyTupleTableSlot
 LerpFunc
 LexDescr
 LexemeEntry
-- 
2.37.1 (Apple Git-137.1)