From dee5bf87314606e020a574b85cf1e9cbe066588d Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 27 Oct 2020 19:29:56 +0300
Subject: [PATCH v1106 1/4] Use shared lock in GetMultiXactIdMembers for
 offsets and members

Previously the read of multixact required exclusive control locks for both
offsets and members SLRUs.  This could lead to the excessive lock contention.
This commit we makes multixacts SLRU take advantage of SimpleLruReadPage_ReadOnly
similar to clog, commit_ts and subtrans.

In order to evade extra reacquiring of CLRU lock, we teach
SimpleLruReadPage_ReadOnly() to take into account the current lock mode and
report resulting lock mode back.

Discussion: https://postgr.es/m/a7f1c4e1-1015-92a4-2bd4-6736bd13d03e%40postgrespro.ru#c496c4e75fc0605094a0e1f763e6a6ec
Author: Andrey Borodin
Reviewed-by: Kyotaro Horiguchi, Daniel Gustafsson
Reviewed-by: Anastasia Lubennikova, Alexander Korotkov
---
 src/backend/access/transam/clog.c      |  3 ++-
 src/backend/access/transam/commit_ts.c |  3 ++-
 src/backend/access/transam/multixact.c | 21 ++++++++++++++-------
 src/backend/access/transam/slru.c      | 23 +++++++++++++++++------
 src/backend/access/transam/subtrans.c  |  3 ++-
 src/backend/commands/async.c           |  6 ++++--
 src/backend/storage/lmgr/lwlock.c      |  3 +++
 src/backend/storage/lmgr/predicate.c   |  4 +++-
 src/include/access/slru.h              |  8 ++++----
 src/include/storage/lwlock.h           |  2 ++
 10 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index a8d1080f17..bdfbd10f96 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -640,10 +640,11 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 	int			lsnindex;
 	char	   *byteptr;
 	XidStatus	status;
+	LWLockMode	lockmode = LW_NONE;
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
 
-	slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid);
+	slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid, &lockmode);
 	byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
 
 	status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 6c4911d9bc..d7b6f583e1 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -288,6 +288,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 	CommitTimestampEntry entry;
 	TransactionId oldestCommitTsXid;
 	TransactionId newestCommitTsXid;
+	LWLockMode	lockmode = LW_NONE;
 
 	if (!TransactionIdIsValid(xid))
 		ereport(ERROR,
@@ -342,7 +343,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 	}
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid);
+	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid, &lockmode);
 	memcpy(&entry,
 		   CommitTsCtl->shared->page_buffer[slotno] +
 		   SizeOfCommitTimestampEntry * entryno,
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 365daf153a..5ca10a139a 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1216,6 +1216,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	MultiXactId tmpMXact;
 	MultiXactOffset nextOffset;
 	MultiXactMember *ptr;
+	LWLockMode	lockmode = LW_NONE;
 
 	debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
 
@@ -1319,12 +1320,13 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	 * time on every multixact creation.
 	 */
 retry:
-	LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
 
 	pageno = MultiXactIdToOffsetPage(multi);
 	entryno = MultiXactIdToOffsetEntry(multi);
 
-	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
+	/* lock is acquired by SimpleLruReadPage_ReadOnly */
+	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+										multi, &lockmode);
 	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 	offptr += entryno;
 	offset = *offptr;
@@ -1356,7 +1358,8 @@ retry:
 		entryno = MultiXactIdToOffsetEntry(tmpMXact);
 
 		if (pageno != prev_pageno)
-			slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
+			slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+												tmpMXact, &lockmode);
 
 		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 		offptr += entryno;
@@ -1380,8 +1383,7 @@ retry:
 	*members = ptr;
 
 	/* Now get the members themselves. */
-	LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
-
+	lockmode = LW_NONE;
 	truelength = 0;
 	prev_pageno = -1;
 	for (i = 0; i < length; i++, offset++)
@@ -1397,7 +1399,8 @@ retry:
 
 		if (pageno != prev_pageno)
 		{
-			slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
+			slotno = SimpleLruReadPage_ReadOnly(MultiXactMemberCtl, pageno,
+												multi, &lockmode);
 			prev_pageno = pageno;
 		}
 
@@ -1420,6 +1423,7 @@ retry:
 		truelength++;
 	}
 
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(MultiXactMemberControlLock);
 
 	/*
@@ -2723,6 +2727,7 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	int			entryno;
 	int			slotno;
 	MultiXactOffset *offptr;
+	LWLockMode	lockmode = LW_NONE;
 
 	Assert(MultiXactState->finishedStartup);
 
@@ -2743,10 +2748,12 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 		return false;
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
+	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+										multi, &lockmode);
 	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 	offptr += entryno;
 	offset = *offptr;
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(MultiXactOffsetControlLock);
 
 	*result = offset;
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index cfe9513827..f86950eb51 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -460,17 +460,23 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
  * Return value is the shared-buffer slot number now holding the page.
  * The buffer's LRU access info is updated.
  *
- * Control lock must NOT be held at entry, but will be held at exit.
- * It is unspecified whether the lock will be shared or exclusive.
+ * Control lock must be held in *lock_mode mode, which may be LW_NONE.  Control
+ * lock will be held at exit in at least shared mode.  Resulting control lock
+ * mode is set to *lock_mode.
  */
 int
-SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
+SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid,
+						   LWLockMode *lock_mode)
 {
 	SlruShared	shared = ctl->shared;
 	int			slotno;
 
 	/* Try to find the page while holding only shared lock */
-	LWLockAcquire(shared->ControlLock, LW_SHARED);
+	if (*lock_mode == LW_NONE)
+	{
+		LWLockAcquire(shared->ControlLock, LW_SHARED);
+		*lock_mode = LW_SHARED;
+	}
 
 	/* See if page is already in a buffer */
 	for (slotno = 0; slotno < shared->num_slots; slotno++)
@@ -486,8 +492,13 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
 	}
 
 	/* No luck, so switch to normal exclusive lock and do regular read */
-	LWLockRelease(shared->ControlLock);
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
+	if (*lock_mode != LW_EXCLUSIVE)
+	{
+		Assert(*lock_mode == LW_NONE);
+		LWLockRelease(shared->ControlLock);
+		LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
+		*lock_mode = LW_EXCLUSIVE;
+	}
 
 	return SimpleLruReadPage(ctl, pageno, true, xid);
 }
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index ef63b6d98a..440a21415a 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -113,6 +113,7 @@ SubTransGetParent(TransactionId xid)
 	int			slotno;
 	TransactionId *ptr;
 	TransactionId parent;
+	LWLockMode	lockmode = LW_NONE;
 
 	/* Can't ask about stuff that might not be around anymore */
 	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
@@ -123,7 +124,7 @@ SubTransGetParent(TransactionId xid)
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
 
-	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
+	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid, &lockmode);
 	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
 	ptr += entryno;
 
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 7f4e11bd90..4ebc069a5d 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -1834,6 +1834,7 @@ asyncQueueReadAllNotifications(void)
 			int			curoffset = QUEUE_POS_OFFSET(pos);
 			int			slotno;
 			int			copysize;
+			LWLockMode	lockmode = LW_NONE;
 
 			/*
 			 * We copy the data from SLRU into a local buffer, so as to avoid
@@ -1842,7 +1843,7 @@ asyncQueueReadAllNotifications(void)
 			 * of the page we will actually inspect.
 			 */
 			slotno = SimpleLruReadPage_ReadOnly(AsyncCtl, curpage,
-												InvalidTransactionId);
+												InvalidTransactionId, &lockmode);
 			if (curpage == QUEUE_POS_PAGE(head))
 			{
 				/* we only want to read as far as head */
@@ -1859,7 +1860,8 @@ asyncQueueReadAllNotifications(void)
 				   AsyncCtl->shared->page_buffer[slotno] + curoffset,
 				   copysize);
 			/* Release lock that we got from SimpleLruReadPage_ReadOnly() */
-			LWLockRelease(AsyncCtlLock);
+			Assert(lockmode != LW_NONE);
+			LWLockRelease(AsyncCtl);
 
 			/*
 			 * Process messages up to the stop position, end of page, or an
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 5aae233995..066dbb8ccf 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -982,6 +982,8 @@ LWLockWakeup(LWLock *lock)
 static void
 LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 {
+	Assert(mode != LW_NONE);
+
 	/*
 	 * If we don't have a PGPROC structure, there's no way to wait. This
 	 * should never occur, since MyProc should only be null during shared
@@ -1742,6 +1744,7 @@ LWLockRelease(LWLock *lock)
 		elog(ERROR, "lock %s is not held", T_NAME(lock));
 
 	mode = held_lwlocks[i].mode;
+	Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
 
 	num_held_lwlocks--;
 	for (; i < num_held_lwlocks; i++)
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 002d040ba6..9513fd915b 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -908,6 +908,7 @@ OldSerXidGetMinConflictCommitSeqNo(TransactionId xid)
 	TransactionId tailXid;
 	SerCommitSeqNo val;
 	int			slotno;
+	LWLockMode	lockmode = LW_NONE;
 
 	Assert(TransactionIdIsValid(xid));
 
@@ -930,8 +931,9 @@ OldSerXidGetMinConflictCommitSeqNo(TransactionId xid)
 	 * but will return with that lock held, which must then be released.
 	 */
 	slotno = SimpleLruReadPage_ReadOnly(OldSerXidSlruCtl,
-										OldSerXidPage(xid), xid);
+										OldSerXidPage(xid), xid, &lockmode);
 	val = OldSerXidValue(slotno, xid);
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(OldSerXidLock);
 	return val;
 }
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 0e89e48c97..51a3df62e0 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -141,10 +141,10 @@ extern Size SimpleLruShmemSize(int nslots, int nlsns);
 extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 			  LWLock *ctllock, const char *subdir, int tranche_id);
 extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
-extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
-				  TransactionId xid);
-extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
-						   TransactionId xid);
+extern int	SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
+							  TransactionId xid);
+extern int	SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
+									   TransactionId xid, LWLockMode *lock_mode);
 extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
 extern void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied);
 extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index c21bfe2f66..3263c0fe62 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -131,6 +131,8 @@ extern PGDLLIMPORT int NamedLWLockTrancheRequests;
 
 typedef enum LWLockMode
 {
+	LW_NONE,					/* Not a lock mode. Indicates that there is no
+								 * lock. */
 	LW_EXCLUSIVE,
 	LW_SHARED,
 	LW_WAIT_UNTIL_FREE			/* A special mode used in PGPROC->lwlockMode,
-- 
2.24.3 (Apple Git-128)

