From 0c4d098c981e297857f527683e8aed4d07098689 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sat, 21 Feb 2026 11:08:08 +0200
Subject: [PATCH v1] Rework ginScanToDelete() to pass Buffers instead of
 BlockNumbers.

Previously, ginScanToDelete() and ginDeletePage() passed BlockNumbers and
re-read pages that were already pinned and locked during the tree walk.  The
caller ginVacuumPostingTree()) held a cleanup-locked root buffer, yet
ginScanToDelete() re-read it by block number with special-case code to skip
re-locking.

Rework both functions to pass Buffers directly.  DataPageDeleteStack now
carries buffer, myoff (downlink offset in parent), and isRoot per level,
so ginScanToDelete() takes only GinVacuumState and DataPageDeleteStack
pointers.  Also, ginDeletePage() receives the three Buffers directly, and
no longer reads or releases them itself.  The caller reads and locks child
pages before recursing, and manages buffer lifecycle afterward.

This eliminates the confusing isRoot special cases in buffer management,
including the apparent (but unreachable) double release of the root
buffer identified by Andres Freund.

Add comments explaining the locking protocol and the DataPageDeleteStack
structure.

Reported-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/utrlxij43fbguzw4kldte2spc4btoldizutcqyrfakqnbrp3ir@ph3sphpj4asz
---
 src/backend/access/gin/ginvacuum.c | 188 +++++++++++++++++------------
 1 file changed, 111 insertions(+), 77 deletions(-)

diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index c9f143f6c31..28002ac97f6 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -110,31 +110,52 @@ xlogVacuumPage(Relation index, Buffer buffer)
 }
 
 
+/*
+ * Stack entry used during posting tree empty-page deletion scan.
+ *
+ * One DataPageDeleteStack entry is allocated per tree level.  As
+ * ginScanToDelete() recurses down the tree, each entry tracks the buffer
+ * of the page currently being visited at that level ('buffer'), and the
+ * rightmost non-deleted page to its left ('leftBuffer').  The left page
+ * is kept pinned and exclusively locked because ginDeletePage() needs it
+ * to update the sibling chain; acquiring it later could deadlock with
+ * ginStepRight(), which locks pages left-to-right (see commit e14641197a5).
+ */
 typedef struct DataPageDeleteStack
 {
 	struct DataPageDeleteStack *child;
 	struct DataPageDeleteStack *parent;
 
-	BlockNumber blkno;			/* current block number */
-	Buffer		leftBuffer;		/* pinned and locked rightest non-deleted page
-								 * on left */
+	Buffer		buffer;			/* buffer for the page being visited at this
+								 * tree level; valid only while recursing
+								 * into children */
+	Buffer		leftBuffer;		/* pinned and locked rightmost non-deleted
+								 * sibling to the left of the current page */
+	OffsetNumber myoff;			/* offset of this page's downlink in the
+								 * parent */
 	bool		isRoot;
 } DataPageDeleteStack;
 
 
 /*
  * Delete a posting tree page.
+ *
+ * Removes the page identified by dBuffer from the posting tree by updating
+ * the left sibling's rightlink (in lBuffer) to skip over the deleted page,
+ * and removing the downlink from the parent page (in pBuffer).  All three
+ * buffers must already be pinned and exclusively locked by the caller.
+ *
+ * The buffers are NOT released nor unlocked here; the caller is responsible
+ * for this.
  */
 static void
-ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
-			  BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot)
+ginDeletePage(GinVacuumState *gvs, Buffer dBuffer, Buffer lBuffer,
+			  Buffer pBuffer, OffsetNumber myoff, bool isParentRoot)
 {
-	Buffer		dBuffer;
-	Buffer		lBuffer;
-	Buffer		pBuffer;
 	Page		page,
 				parentPage;
 	BlockNumber rightlink;
+	BlockNumber deleteBlkno = BufferGetBlockNumber(dBuffer);
 
 	/*
 	 * This function MUST be called only if someone of parent pages hold
@@ -142,12 +163,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
 	 * happen in this subtree. Caller also acquires Exclusive locks on
 	 * deletable, parent and left pages.
 	 */
-	lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
-								 RBM_NORMAL, gvs->strategy);
-	dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno,
-								 RBM_NORMAL, gvs->strategy);
-	pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno,
-								 RBM_NORMAL, gvs->strategy);
 
 	page = BufferGetPage(dBuffer);
 	rightlink = GinPageGetOpaque(page)->rightlink;
@@ -226,55 +241,37 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
 
 	END_CRIT_SECTION();
 
-	ReleaseBuffer(pBuffer);
-	ReleaseBuffer(lBuffer);
-	ReleaseBuffer(dBuffer);
-
 	gvs->result->pages_newly_deleted++;
 	gvs->result->pages_deleted++;
 }
 
 
 /*
- * Scans posting tree and deletes empty pages.  Caller must lock root page for
- * cleanup.  During scan path from root to current page is kept exclusively
- * locked.  Also keep left page exclusively locked, because ginDeletePage()
- * needs it.  If we try to relock left page later, it could deadlock with
- * ginStepRight().
+ * Scans a posting tree and deletes empty pages.
+ *
+ * The caller must hold a cleanup lock on the root page to prevent concurrent
+ * inserts.  The entire path from the root down to the current page is kept
+ * exclusively locked throughout the scan.  The left sibling at each level is
+ * also kept locked, because ginDeletePage() needs it to update the rightlink
+ * of the left sibling; re-acquiring the left sibling lock later could
+ * deadlock with ginStepRight(), which acquires page locks left-to-right.
+ *
+ * All per-level state is carried in 'myStackItem': the buffer to process
+ * (must already be pinned and exclusively locked), the left sibling buffer,
+ * and this page's offset in the parent's downlink array.  The root entry is
+ * set up by ginVacuumPostingTree(); child entries are populated here before
+ * recursing.
+ *
+ * Returns true if the page was deleted, false otherwise.
  */
 static bool
-ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
-				DataPageDeleteStack *parent, OffsetNumber myoff)
+ginScanToDelete(GinVacuumState *gvs, DataPageDeleteStack *myStackItem)
 {
-	DataPageDeleteStack *me;
-	Buffer		buffer;
+	Buffer		buffer = myStackItem->buffer;
 	Page		page;
-	bool		meDelete = false;
+	bool		pageWasDeleted = false;
 	bool		isempty;
 
-	if (isRoot)
-	{
-		me = parent;
-	}
-	else
-	{
-		if (!parent->child)
-		{
-			me = palloc0_object(DataPageDeleteStack);
-			me->parent = parent;
-			parent->child = me;
-			me->leftBuffer = InvalidBuffer;
-		}
-		else
-			me = parent->child;
-	}
-
-	buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
-								RBM_NORMAL, gvs->strategy);
-
-	if (!isRoot)
-		LockBuffer(buffer, GIN_EXCLUSIVE);
-
 	page = BufferGetPage(buffer);
 
 	Assert(GinPageIsData(page));
@@ -283,19 +280,48 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
 	{
 		OffsetNumber i;
 
-		me->blkno = blkno;
-		for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
+		for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff;)
 		{
 			PostingItem *pitem = GinDataPageGetPostingItem(page, i);
+			Buffer		childBuffer;
+
+			childBuffer = ReadBufferExtended(gvs->index,
+											 MAIN_FORKNUM,
+											 PostingItemGetBlockNumber(pitem),
+											 RBM_NORMAL, gvs->strategy);
+			LockBuffer(childBuffer, GIN_EXCLUSIVE);
+
+			/* Allocate a child stack entry on first use; reuse thereafter */
+			if (!myStackItem->child)
+			{
+				myStackItem->child = palloc0_object(DataPageDeleteStack);
+				myStackItem->child->parent = myStackItem;
+				myStackItem->child->leftBuffer = InvalidBuffer;
+			}
 
-			if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), false, me, i))
-				i--;
+			myStackItem->child->buffer = childBuffer;
+			myStackItem->child->isRoot = false;
+			myStackItem->child->myoff = i;
+
+			/*
+			 * Recurse into child.  If the child page was deleted, its
+			 * downlink was removed from our page, so re-examine the same
+			 * offset; otherwise advance to the next downlink.
+			 */
+			if (!ginScanToDelete(gvs, myStackItem->child))
+				i++;
 		}
+		myStackItem->buffer = InvalidBuffer;
 
-		if (GinPageRightMost(page) && BufferIsValid(me->child->leftBuffer))
+		/*
+		 * After processing all children at this level, release the child
+		 * level's leftBuffer if we're at the rightmost page.  There is no
+		 * right sibling that could need it for deletion.
+		 */
+		if (GinPageRightMost(page) && BufferIsValid(myStackItem->child->leftBuffer))
 		{
-			UnlockReleaseBuffer(me->child->leftBuffer);
-			me->child->leftBuffer = InvalidBuffer;
+			UnlockReleaseBuffer(myStackItem->child->leftBuffer);
+			myStackItem->child->leftBuffer = InvalidBuffer;
 		}
 	}
 
@@ -306,34 +332,40 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
 
 	if (isempty)
 	{
-		/* we never delete the left- or rightmost branch */
-		if (BufferIsValid(me->leftBuffer) && !GinPageRightMost(page))
+		/*
+		 * Proceed to the ginDeletePage() if that's not the leftmost or
+		 * the rightmost page.
+		 */
+		if (BufferIsValid(myStackItem->leftBuffer) && !GinPageRightMost(page))
 		{
-			Assert(!isRoot);
-			ginDeletePage(gvs, blkno, BufferGetBlockNumber(me->leftBuffer),
-						  me->parent->blkno, myoff, me->parent->isRoot);
-			meDelete = true;
+			Assert(!myStackItem->isRoot);
+			ginDeletePage(gvs, buffer, myStackItem->leftBuffer,
+						  myStackItem->parent->buffer, myStackItem->myoff,
+						  myStackItem->parent->isRoot);
+			pageWasDeleted = true;
 		}
 	}
 
-	if (!meDelete)
+	if (!pageWasDeleted)
 	{
-		if (BufferIsValid(me->leftBuffer))
-			UnlockReleaseBuffer(me->leftBuffer);
-		me->leftBuffer = buffer;
+		/*
+		 * Keep this page as the new leftBuffer for this level: the next
+		 * sibling to the right might need it for deletion.  Release any
+		 * previously held left page first.
+		 */
+		if (BufferIsValid(myStackItem->leftBuffer))
+			UnlockReleaseBuffer(myStackItem->leftBuffer);
+		myStackItem->leftBuffer = buffer;
 	}
 	else
 	{
-		if (!isRoot)
-			LockBuffer(buffer, GIN_UNLOCK);
-
-		ReleaseBuffer(buffer);
+		/*
+		 * Page was deleted; release the buffer.  leftBuffer remains the same.
+		 */
+		UnlockReleaseBuffer(buffer);
 	}
 
-	if (isRoot)
-		ReleaseBuffer(buffer);
-
-	return meDelete;
+	return pageWasDeleted;
 }
 
 
@@ -428,10 +460,12 @@ ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
 		LockBufferForCleanup(buffer);
 
 		memset(&root, 0, sizeof(DataPageDeleteStack));
+		root.buffer = buffer;
 		root.leftBuffer = InvalidBuffer;
+		root.myoff = InvalidOffsetNumber;
 		root.isRoot = true;
 
-		ginScanToDelete(gvs, rootBlkno, true, &root, InvalidOffsetNumber);
+		ginScanToDelete(gvs, &root);
 
 		ptr = root.child;
 
-- 
2.39.5 (Apple Git-154)

