Le 08/12/2020 à 18:52, Andrey Borodin a écrit :
Hi Gilles!

Many thanks for your message!

8 дек. 2020 г., в 21:05, Gilles Darold <gil...@darold.net> написал(а):

I know that this report is not really helpful
Quite contrary - this benchmarks prove that controllable reproduction exists. 
I've rebased patches for PG11. Can you please benchmark them (without extending 
SLRU)?

Best regards, Andrey Borodin.

Hi,


Running tests yesterday with the patches has reported log of failures with error on INSERT and UPDATE statements:


   ERROR:  lock MultiXactOffsetControlLock is not held


After a patch review this morning I think I have found what's going wrong. In patch v6-0001-Use-shared-lock-in-GetMultiXactIdMembers-for-offs.patch I think there is a missing reinitialisation of the lockmode variable to LW_NONE inside the retry loop after the call to LWLockRelease() in src/backend/access/transam/multixact.c:1392:GetMultiXactIdMembers(). I've attached a new version of the patch for master that include the fix I'm using now with PG11 and with which everything works very well now.


I'm running more tests to see the impact on the performances to play with multixact_offsets_slru_buffers, multixact_members_slru_buffers and multixact_local_cache_entries. I will reports the results later today.

--
Gilles Darold
http://www.darold.net/

diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 034349aa7b..4c372065de 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -640,10 +640,11 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 	int			lsnindex;
 	char	   *byteptr;
 	XidStatus	status;
+	LWLockMode	lockmode = LW_NONE;
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
 
-	slotno = SimpleLruReadPage_ReadOnly(XactCtl, pageno, xid);
+	slotno = SimpleLruReadPage_ReadOnly(XactCtl, pageno, xid, &lockmode);
 	byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
 
 	status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
@@ -651,6 +652,7 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 	lsnindex = GetLSNIndex(slotno, xid);
 	*lsn = XactCtl->shared->group_lsn[lsnindex];
 
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(XactSLRULock);
 
 	return status;
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 2fe551f17e..2699de033d 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -288,6 +288,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 	CommitTimestampEntry entry;
 	TransactionId oldestCommitTsXid;
 	TransactionId newestCommitTsXid;
+	LWLockMode	lockmode = LW_NONE;
 
 	if (!TransactionIdIsValid(xid))
 		ereport(ERROR,
@@ -342,7 +343,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 	}
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid);
+	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid, &lockmode);
 	memcpy(&entry,
 		   CommitTsCtl->shared->page_buffer[slotno] +
 		   SizeOfCommitTimestampEntry * entryno,
@@ -352,6 +353,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 	if (nodeid)
 		*nodeid = entry.nodeid;
 
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(CommitTsSLRULock);
 	return *ts != 0;
 }
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index eb8de7cf32..56bdd04364 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1237,6 +1237,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	MultiXactId tmpMXact;
 	MultiXactOffset nextOffset;
 	MultiXactMember *ptr;
+	LWLockMode	lockmode = LW_NONE;
 
 	debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
 
@@ -1340,12 +1341,13 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	 * time on every multixact creation.
 	 */
 retry:
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
 
 	pageno = MultiXactIdToOffsetPage(multi);
 	entryno = MultiXactIdToOffsetEntry(multi);
 
-	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
+	/* lock is acquired by SimpleLruReadPage_ReadOnly */
+	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+										multi, &lockmode);
 	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 	offptr += entryno;
 	offset = *offptr;
@@ -1377,7 +1379,8 @@ retry:
 		entryno = MultiXactIdToOffsetEntry(tmpMXact);
 
 		if (pageno != prev_pageno)
-			slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
+			slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+												tmpMXact, &lockmode);
 
 		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 		offptr += entryno;
@@ -1387,6 +1390,7 @@ retry:
 		{
 			/* Corner case 2: next multixact is still being filled in */
 			LWLockRelease(MultiXactOffsetSLRULock);
+			lockmode = LW_NONE;
 			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
 			goto retry;
@@ -1395,14 +1399,14 @@ retry:
 		length = nextMXOffset - offset;
 	}
 
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(MultiXactOffsetSLRULock);
 
 	ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
 	*members = ptr;
 
 	/* Now get the members themselves. */
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
+	lockmode = LW_NONE;
 	truelength = 0;
 	prev_pageno = -1;
 	for (i = 0; i < length; i++, offset++)
@@ -1418,7 +1422,8 @@ retry:
 
 		if (pageno != prev_pageno)
 		{
-			slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
+			slotno = SimpleLruReadPage_ReadOnly(MultiXactMemberCtl, pageno,
+												multi, &lockmode);
 			prev_pageno = pageno;
 		}
 
@@ -1441,6 +1446,7 @@ retry:
 		truelength++;
 	}
 
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(MultiXactMemberSLRULock);
 
 	/*
@@ -2733,6 +2739,7 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	int			entryno;
 	int			slotno;
 	MultiXactOffset *offptr;
+	LWLockMode	lockmode = LW_NONE;
 
 	Assert(MultiXactState->finishedStartup);
 
@@ -2749,10 +2756,12 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 		return false;
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
+	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+										multi, &lockmode);
 	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 	offptr += entryno;
 	offset = *offptr;
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(MultiXactOffsetSLRULock);
 
 	*result = offset;
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index cec17cb2ae..e271b37d7b 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -487,17 +487,23 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
  * Return value is the shared-buffer slot number now holding the page.
  * The buffer's LRU access info is updated.
  *
- * Control lock must NOT be held at entry, but will be held at exit.
- * It is unspecified whether the lock will be shared or exclusive.
+ * Control lock must be held in *lock_mode mode, which may be LW_NONE.  Control
+ * lock will be held at exit in at least shared mode.  Resulting control lock
+ * mode is set to *lock_mode.
  */
 int
-SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
+SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid,
+						   LWLockMode *lock_mode)
 {
 	SlruShared	shared = ctl->shared;
 	int			slotno;
 
 	/* Try to find the page while holding only shared lock */
-	LWLockAcquire(shared->ControlLock, LW_SHARED);
+	if (*lock_mode == LW_NONE)
+	{
+		LWLockAcquire(shared->ControlLock, LW_SHARED);
+		*lock_mode = LW_SHARED;
+	}
 
 	/* See if page is already in a buffer */
 	for (slotno = 0; slotno < shared->num_slots; slotno++)
@@ -517,8 +523,13 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
 	}
 
 	/* No luck, so switch to normal exclusive lock and do regular read */
-	LWLockRelease(shared->ControlLock);
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
+	if (*lock_mode != LW_EXCLUSIVE)
+	{
+		Assert(*lock_mode == LW_NONE);
+		LWLockRelease(shared->ControlLock);
+		LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
+		*lock_mode = LW_EXCLUSIVE;
+	}
 
 	return SimpleLruReadPage(ctl, pageno, true, xid);
 }
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 0111e867c7..7bb1543189 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -113,6 +113,7 @@ SubTransGetParent(TransactionId xid)
 	int			slotno;
 	TransactionId *ptr;
 	TransactionId parent;
+	LWLockMode	lockmode = LW_NONE;
 
 	/* Can't ask about stuff that might not be around anymore */
 	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
@@ -123,12 +124,13 @@ SubTransGetParent(TransactionId xid)
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
 
-	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
+	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid, &lockmode);
 	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
 	ptr += entryno;
 
 	parent = *ptr;
 
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(SubtransSLRULock);
 
 	return parent;
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index c0763c63e2..ef1dbbe53d 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -2011,6 +2011,7 @@ asyncQueueReadAllNotifications(void)
 			int			curoffset = QUEUE_POS_OFFSET(pos);
 			int			slotno;
 			int			copysize;
+			LWLockMode	lockmode = LW_NONE;
 
 			/*
 			 * We copy the data from SLRU into a local buffer, so as to avoid
@@ -2019,7 +2020,7 @@ asyncQueueReadAllNotifications(void)
 			 * part of the page we will actually inspect.
 			 */
 			slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage,
-												InvalidTransactionId);
+												InvalidTransactionId, &lockmode);
 			if (curpage == QUEUE_POS_PAGE(head))
 			{
 				/* we only want to read as far as head */
@@ -2036,6 +2037,7 @@ asyncQueueReadAllNotifications(void)
 				   NotifyCtl->shared->page_buffer[slotno] + curoffset,
 				   copysize);
 			/* Release lock that we got from SimpleLruReadPage_ReadOnly() */
+			Assert(lockmode != LW_NONE);
 			LWLockRelease(NotifySLRULock);
 
 			/*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 108e652179..ad1dcc7140 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -1067,6 +1067,8 @@ LWLockWakeup(LWLock *lock)
 static void
 LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 {
+	Assert(mode != LW_NONE);
+
 	/*
 	 * If we don't have a PGPROC structure, there's no way to wait. This
 	 * should never occur, since MyProc should only be null during shared
@@ -1827,6 +1829,7 @@ LWLockRelease(LWLock *lock)
 		elog(ERROR, "lock %s is not held", T_NAME(lock));
 
 	mode = held_lwlocks[i].mode;
+	Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
 
 	num_held_lwlocks--;
 	for (; i < num_held_lwlocks; i++)
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 8a365b400c..a4df90a8ae 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -924,6 +924,7 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 	TransactionId tailXid;
 	SerCommitSeqNo val;
 	int			slotno;
+	LWLockMode	lockmode = LW_NONE;
 
 	Assert(TransactionIdIsValid(xid));
 
@@ -946,8 +947,9 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 	 * but will return with that lock held, which must then be released.
 	 */
 	slotno = SimpleLruReadPage_ReadOnly(SerialSlruCtl,
-										SerialPage(xid), xid);
+										SerialPage(xid), xid, &lockmode);
 	val = SerialValue(slotno, xid);
+	Assert(lockmode != LW_NONE);
 	LWLockRelease(SerialSLRULock);
 	return val;
 }
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index b39b43504d..4b66d3b592 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -142,7 +142,7 @@ extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
 extern int	SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 							  TransactionId xid);
 extern int	SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
-									   TransactionId xid);
+									   TransactionId xid, LWLockMode *lock_mode);
 extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
 extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied);
 extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index af9b41795d..e680c6397a 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -129,6 +129,8 @@ extern PGDLLIMPORT int NamedLWLockTrancheRequests;
 
 typedef enum LWLockMode
 {
+	LW_NONE,					/* Not a lock mode. Indicates that there is no
+								 * lock. */
 	LW_EXCLUSIVE,
 	LW_SHARED,
 	LW_WAIT_UNTIL_FREE			/* A special mode used in PGPROC->lwWaitMode,

Reply via email to