From e83afb020a6b22416d6c55104d00425883fecd97 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Thu, 6 Oct 2022 11:45:22 +0530
Subject: [PATCH v2 2/2] Don't acquire cleanup lock on the new bucket page.

During the hash index's split operation, we acquire cleanup lock on the
old bucket page to protect the split from concurrent inserts. However,
unlike the old bucket, we don't need to check for cleanup lock on the new
bucket as no other backend could find this bucket unless the meta page is
updated.
---
 src/backend/access/hash/README      | 16 ++++++++--------
 src/backend/access/hash/hash_xlog.c |  9 ++++-----
 src/backend/access/hash/hashpage.c  | 16 ++++------------
 3 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/src/backend/access/hash/README b/src/backend/access/hash/README
index 13dc59c..e0c19d9 100644
--- a/src/backend/access/hash/README
+++ b/src/backend/access/hash/README
@@ -150,14 +150,14 @@ on that buffer.  For hash indexes, a cleanup lock on a primary bucket page
 represents the right to perform an arbitrary reorganization of the entire
 bucket.  Therefore, scans retain a pin on the primary bucket page for the
 bucket they are currently scanning.  Splitting a bucket requires a cleanup
-lock on both the old and new primary bucket pages.  VACUUM therefore takes
-a cleanup lock on every bucket page in order to remove tuples.  It can also
-remove tuples copied to a new bucket by any previous split operation, because
-the cleanup lock taken on the primary bucket page guarantees that no scans
-which started prior to the most recent split can still be in progress.  After
-cleaning each page individually, it attempts to take a cleanup lock on the
-primary bucket page in order to "squeeze" the bucket down to the minimum
-possible number of pages.
+lock on the old primary bucket page.  VACUUM therefore takes a cleanup lock
+on every bucket page in order to remove tuples.  It can also remove tuples
+copied to a new bucket by any previous split operation, because the cleanup
+lock taken on the primary bucket page guarantees that no scans which started
+prior to the most recent split can still be in progress.  After cleaning each
+page individually, it attempts to take a cleanup lock on the primary bucket
+page in order to "squeeze" the bucket down to the minimum possible number of
+pages.
 
 To avoid deadlocks, we must be consistent about the lock order in which we
 lock the buckets for operations that requires locks on two different buckets.
diff --git a/src/backend/access/hash/hash_xlog.c b/src/backend/access/hash/hash_xlog.c
index a24a1c3..0d983e3 100644
--- a/src/backend/access/hash/hash_xlog.c
+++ b/src/backend/access/hash/hash_xlog.c
@@ -323,9 +323,9 @@ hash_xlog_split_allocate_page(XLogReaderState *record)
 	XLogRedoAction action;
 
 	/*
-	 * To be consistent with normal operation, here we take cleanup locks on
-	 * both the old and new buckets even though there can't be any concurrent
-	 * inserts.
+	 * To be consistent with normal operation, here we take cleanup lock on the
+	 * old bucket and an exclusive lock on the new bucket even though there
+	 * can't be any concurrent inserts.
 	 */
 
 	/* replay the record for old bucket */
@@ -351,8 +351,7 @@ hash_xlog_split_allocate_page(XLogReaderState *record)
 	}
 
 	/* replay the record for new bucket */
-	XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_AND_CLEANUP_LOCK, true,
-								  &newbuf);
+	newbuf = XLogInitBufferForRedo(record, 1);
 	_hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
 				  xlrec->new_bucket_flag, true);
 	MarkBufferDirty(newbuf);
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 05fcf76..087a2ca 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -601,8 +601,7 @@ _hash_pageinit(Page page, Size size)
 /*
  * Attempt to expand the hash table by creating one new bucket.
  *
- * This will silently do nothing if we don't get cleanup lock on old or
- * new bucket.
+ * This will silently do nothing if we don't get cleanup lock on old bucket.
  *
  * Complete the pending splits and remove the tuples from old bucket,
  * if there are any left over from the previous split.
@@ -805,18 +804,11 @@ restart_expand:
 	/*
 	 * Physically allocate the new bucket's primary page.  We want to do this
 	 * before changing the metapage's mapping info, in case we can't get the
-	 * disk space.  Ideally, we don't need to check for cleanup lock on new
-	 * bucket as no other backend could find this bucket unless meta page is
-	 * updated and we initialize the page just before it.  However, it is just
-	 * to be consistent with old bucket locking.
+	 * disk space.  Unlike old bucket, we don't need to acquire cleanup lock on
+	 * new bucket as no other backend could find this bucket unless meta page
+	 * is updated.
 	 */
 	buf_nblkno = _hash_getnewbuf(rel, start_nblkno, MAIN_FORKNUM);
-	if (!IsBufferCleanupOK(buf_nblkno))
-	{
-		_hash_relbuf(rel, buf_oblkno);
-		_hash_relbuf(rel, buf_nblkno);
-		goto fail;
-	}
 
 	/*
 	 * Since we are scribbling on the pages in the shared buffers, establish a
-- 
1.8.3.1

