> It'd really like to see it being replaced by a queuing lock
> (i.e. lwlock) before we go there. And then maybe partition the
> freelist, and make nentries an atomic.

I believe I just implemented something like this (see attachment). The
idea is to partition PROCLOCK hash table manually into NUM_LOCK_
PARTITIONS smaller and non-partitioned hash tables. Since these tables
are non-partitioned spinlock is not used and there is no lock
contention.

On 60-core server we gain 3.5-4 more TPS according to benchmark
described above. As I understand there is no performance degradation in
other cases (different CPU, traditional pgbench, etc).

If this patch seems to be OK I believe we could consider applying the
same change not only to PROCLOCK hash table.
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 76fc615..1a86f86 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -249,15 +249,56 @@ static volatile FastPathStrongRelationLockData *FastPathStrongRelationLocks;
  * shared memory; LockMethodLocalHash is local to each backend.
  */
 static HTAB *LockMethodLockHash;
-static HTAB *LockMethodProcLockHash;
+static HTAB *LockMethodProcLockHashPartitions[NUM_LOCK_PARTITIONS];
 static HTAB *LockMethodLocalHash;
 
+/*
+ * LockMethodProcLockHash hash table is partitioned into NUM_LOCK_PARTITIONS
+ * usual (non-partitioned, i.e. without HASH_PARTITION flag) hash tables. The
+ * reason for partitioning LockMethodProcLockHash manually instead of just
+ * using single hash table with HASH_PARTITION flag is following. If hash
+ * table has HASH_PARTITION flag it uses a single spinlock to safely
+ * access some of its fields. Turned out in case of this particular hash
+ * table it causes a considerable performance degradation becouse of lock
+ * contention on servers with tens of cores.
+ */
+#define LockMethodProcLockHash(hashcode) \
+	(LockMethodProcLockHashPartitions[LockHashPartition(hashcode)])
+
+/*
+ * Each partition of LockMethodProcLockHash hash table has an unique name
+ * generated from this template using snprintf.
+ */
+static const char ProcLockHashNameTemplate[] = "PROCLOCK hash, partition %d";
+
+/*
+ * Size of buffer required to store LockMethodProcLockHash partition name.
+ *
+ * 10 is number of digits in 2^32. 2 is length of "%d" string.
+ */
+#define PROC_LOCK_HASH_NAME_BUFF_SIZE (sizeof(ProcLockHashNameTemplate)+10-2)
 
 /* private state for error cleanup */
 static LOCALLOCK *StrongLockInProgress;
 static LOCALLOCK *awaitedLock;
 static ResourceOwner awaitedOwner;
 
+/*
+ * Calculate total number of entries in LockMethodProcLockHash hash table.
+ *
+ * Caller is responsible for having acquired appropriate LWLocks.
+ */
+static long
+GetProcLockHashNumEntries()
+{
+	int			i;
+	long		sum = 0;
+
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+		sum += hash_get_num_entries(LockMethodProcLockHashPartitions[i]);
+
+	return sum;
+}
 
 #ifdef LOCK_DEBUG
 
@@ -373,16 +414,16 @@ void
 InitLocks(void)
 {
 	HASHCTL		info;
-	long		init_table_size,
-				max_table_size;
+	long		shmem_table_size;
 	bool		found;
+	int			i;
+	char		proclock_hash_name[PROC_LOCK_HASH_NAME_BUFF_SIZE];
 
 	/*
 	 * Compute init/max size to request for lock hashtables.  Note these
 	 * calculations must agree with LockShmemSize!
 	 */
-	max_table_size = NLOCKENTS();
-	init_table_size = max_table_size / 2;
+	shmem_table_size = NLOCKENTS();
 
 	/*
 	 * Allocate hash table for LOCK structs.  This stores per-locked-object
@@ -394,14 +435,13 @@ InitLocks(void)
 	info.num_partitions = NUM_LOCK_PARTITIONS;
 
 	LockMethodLockHash = ShmemInitHash("LOCK hash",
-									   init_table_size,
-									   max_table_size,
+									   shmem_table_size,
+									   shmem_table_size,
 									   &info,
 									HASH_ELEM | HASH_BLOBS | HASH_PARTITION);
 
 	/* Assume an average of 2 holders per lock */
-	max_table_size *= 2;
-	init_table_size *= 2;
+	shmem_table_size = (shmem_table_size * 2) / NUM_LOCK_PARTITIONS;
 
 	/*
 	 * Allocate hash table for PROCLOCK structs.  This stores
@@ -412,11 +452,17 @@ InitLocks(void)
 	info.hash = proclock_hash;
 	info.num_partitions = NUM_LOCK_PARTITIONS;
 
-	LockMethodProcLockHash = ShmemInitHash("PROCLOCK hash",
-										   init_table_size,
-										   max_table_size,
-										   &info,
-								 HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+	{
+		snprintf(proclock_hash_name, sizeof(proclock_hash_name),
+				 ProcLockHashNameTemplate, i + 1);
+
+		LockMethodProcLockHashPartitions[i] = ShmemInitHash(proclock_hash_name,
+															shmem_table_size,
+															shmem_table_size,
+															&info,
+												  HASH_ELEM | HASH_FUNCTION);
+	}
 
 	/*
 	 * Allocate fast-path structures.
@@ -943,7 +989,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 				proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode);
 				SHMQueueDelete(&proclock->lockLink);
 				SHMQueueDelete(&proclock->procLink);
-				if (!hash_search_with_hash_value(LockMethodProcLockHash,
+				if (!hash_search_with_hash_value(LockMethodProcLockHash(hashcode),
 												 (void *) &(proclock->tag),
 												 proclock_hashcode,
 												 HASH_REMOVE,
@@ -1102,7 +1148,7 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
 	/*
 	 * Find or create a proclock entry with this tag
 	 */
-	proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash,
+	proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash(hashcode),
 														(void *) &proclocktag,
 														proclock_hashcode,
 														HASH_ENTER_NULL,
@@ -1424,7 +1470,7 @@ CleanUpLock(LOCK *lock, PROCLOCK *proclock,
 		SHMQueueDelete(&proclock->lockLink);
 		SHMQueueDelete(&proclock->procLink);
 		proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode);
-		if (!hash_search_with_hash_value(LockMethodProcLockHash,
+		if (!hash_search_with_hash_value(LockMethodProcLockHash(hashcode),
 										 (void *) &(proclock->tag),
 										 proclock_hashcode,
 										 HASH_REMOVE,
@@ -1881,7 +1927,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 
 		proclocktag.myLock = lock;
 		proclocktag.myProc = MyProc;
-		locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
+		locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash(locallock->hashcode),
 													   (void *) &proclocktag,
 													   HASH_FIND,
 													   NULL);
@@ -2631,7 +2677,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
 
 		proclock_hashcode = ProcLockHashCode(&proclocktag, locallock->hashcode);
 		proclock = (PROCLOCK *)
-			hash_search_with_hash_value(LockMethodProcLockHash,
+			hash_search_with_hash_value(LockMethodProcLockHash(locallock->hashcode),
 										(void *) &proclocktag,
 										proclock_hashcode,
 										HASH_FIND,
@@ -2914,7 +2960,8 @@ LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc,
 
 	proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode);
 
-	proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash,
+	proclock = (PROCLOCK *) hash_search_with_hash_value(
+											LockMethodProcLockHash(hashcode),
 														(void *) &proclocktag,
 														proclock_hashcode,
 														HASH_FIND,
@@ -3243,7 +3290,7 @@ PostPrepare_Locks(TransactionId xid)
 			 * the same hash key, since there can be only one entry for any
 			 * given lock with my own proc.
 			 */
-			if (!hash_update_hash_key(LockMethodProcLockHash,
+			if (!hash_update_hash_key(LockMethodProcLockHashPartitions[partition],
 									  (void *) proclock,
 									  (void *) &proclocktag))
 				elog(PANIC, "duplicate entry found while reassigning a prepared transaction's locks");
@@ -3410,7 +3457,7 @@ GetLockStatusData(void)
 		LWLockAcquire(LockHashPartitionLockByIndex(i), LW_SHARED);
 
 	/* Now we can safely count the number of proclocks */
-	data->nelements = el + hash_get_num_entries(LockMethodProcLockHash);
+	data->nelements = el + GetProcLockHashNumEntries();
 	if (data->nelements > els)
 	{
 		els = data->nelements;
@@ -3418,27 +3465,30 @@ GetLockStatusData(void)
 			repalloc(data->locks, sizeof(LockInstanceData) * els);
 	}
 
-	/* Now scan the tables to copy the data */
-	hash_seq_init(&seqstat, LockMethodProcLockHash);
-
-	while ((proclock = (PROCLOCK *) hash_seq_search(&seqstat)))
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
 	{
-		PGPROC	   *proc = proclock->tag.myProc;
-		LOCK	   *lock = proclock->tag.myLock;
-		LockInstanceData *instance = &data->locks[el];
+		/* Now scan the tables to copy the data */
+		hash_seq_init(&seqstat, LockMethodProcLockHashPartitions[i]);
 
-		memcpy(&instance->locktag, &lock->tag, sizeof(LOCKTAG));
-		instance->holdMask = proclock->holdMask;
-		if (proc->waitLock == proclock->tag.myLock)
-			instance->waitLockMode = proc->waitLockMode;
-		else
-			instance->waitLockMode = NoLock;
-		instance->backend = proc->backendId;
-		instance->lxid = proc->lxid;
-		instance->pid = proc->pid;
-		instance->fastpath = false;
+		while ((proclock = (PROCLOCK *) hash_seq_search(&seqstat)))
+		{
+			PGPROC	   *proc = proclock->tag.myProc;
+			LOCK	   *lock = proclock->tag.myLock;
+			LockInstanceData *instance = &data->locks[el];
 
-		el++;
+			memcpy(&instance->locktag, &lock->tag, sizeof(LOCKTAG));
+			instance->holdMask = proclock->holdMask;
+			if (proc->waitLock == proclock->tag.myLock)
+				instance->waitLockMode = proc->waitLockMode;
+			else
+				instance->waitLockMode = NoLock;
+			instance->backend = proc->backendId;
+			instance->lxid = proc->lxid;
+			instance->pid = proc->pid;
+			instance->fastpath = false;
+
+			el++;
+		}
 	}
 
 	/*
@@ -3487,7 +3537,7 @@ GetRunningTransactionLocks(int *nlocks)
 		LWLockAcquire(LockHashPartitionLockByIndex(i), LW_SHARED);
 
 	/* Now we can safely count the number of proclocks */
-	els = hash_get_num_entries(LockMethodProcLockHash);
+	els = GetProcLockHashNumEntries();
 
 	/*
 	 * Allocating enough space for all locks in the lock table is overkill,
@@ -3495,42 +3545,46 @@ GetRunningTransactionLocks(int *nlocks)
 	 */
 	accessExclusiveLocks = palloc(els * sizeof(xl_standby_lock));
 
-	/* Now scan the tables to copy the data */
-	hash_seq_init(&seqstat, LockMethodProcLockHash);
-
-	/*
-	 * If lock is a currently granted AccessExclusiveLock then it will have
-	 * just one proclock holder, so locks are never accessed twice in this
-	 * particular case. Don't copy this code for use elsewhere because in the
-	 * general case this will give you duplicate locks when looking at
-	 * non-exclusive lock types.
-	 */
-	index = 0;
-	while ((proclock = (PROCLOCK *) hash_seq_search(&seqstat)))
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
 	{
-		/* make sure this definition matches the one used in LockAcquire */
-		if ((proclock->holdMask & LOCKBIT_ON(AccessExclusiveLock)) &&
-			proclock->tag.myLock->tag.locktag_type == LOCKTAG_RELATION)
+		/* Now scan the tables to copy the data */
+		hash_seq_init(&seqstat, LockMethodProcLockHashPartitions[i]);
+
+		/*
+		 * If lock is a currently granted AccessExclusiveLock then it will
+		 * have just one proclock holder, so locks are never accessed twice in
+		 * this particular case. Don't copy this code for use elsewhere
+		 * because in the general case this will give you duplicate locks when
+		 * looking at non-exclusive lock types.
+		 */
+		index = 0;
+		while ((proclock = (PROCLOCK *) hash_seq_search(&seqstat)))
 		{
-			PGPROC	   *proc = proclock->tag.myProc;
-			PGXACT	   *pgxact = &ProcGlobal->allPgXact[proc->pgprocno];
-			LOCK	   *lock = proclock->tag.myLock;
-			TransactionId xid = pgxact->xid;
+			/* make sure this definition matches the one used in LockAcquire */
+			if ((proclock->holdMask & LOCKBIT_ON(AccessExclusiveLock)) &&
+				proclock->tag.myLock->tag.locktag_type == LOCKTAG_RELATION)
+			{
+				PGPROC	   *proc = proclock->tag.myProc;
+				PGXACT	   *pgxact = &ProcGlobal->allPgXact[proc->pgprocno];
+				LOCK	   *lock = proclock->tag.myLock;
+				TransactionId xid = pgxact->xid;
 
-			/*
-			 * Don't record locks for transactions if we know they have
-			 * already issued their WAL record for commit but not yet released
-			 * lock. It is still possible that we see locks held by already
-			 * complete transactions, if they haven't yet zeroed their xids.
-			 */
-			if (!TransactionIdIsValid(xid))
-				continue;
+				/*
+				 * Don't record locks for transactions if we know they have
+				 * already issued their WAL record for commit but not yet
+				 * released lock. It is still possible that we see locks held
+				 * by already complete transactions, if they haven't yet
+				 * zeroed their xids.
+				 */
+				if (!TransactionIdIsValid(xid))
+					continue;
 
-			accessExclusiveLocks[index].xid = xid;
-			accessExclusiveLocks[index].dbOid = lock->tag.locktag_field1;
-			accessExclusiveLocks[index].relOid = lock->tag.locktag_field2;
+				accessExclusiveLocks[index].xid = xid;
+				accessExclusiveLocks[index].dbOid = lock->tag.locktag_field1;
+				accessExclusiveLocks[index].relOid = lock->tag.locktag_field2;
 
-			index++;
+				index++;
+			}
 		}
 	}
 
@@ -3614,23 +3668,27 @@ DumpAllLocks(void)
 	PROCLOCK   *proclock;
 	LOCK	   *lock;
 	HASH_SEQ_STATUS status;
+	int			i;
 
 	proc = MyProc;
 
 	if (proc && proc->waitLock)
 		LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0);
 
-	hash_seq_init(&status, LockMethodProcLockHash);
-
-	while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
 	{
-		PROCLOCK_PRINT("DumpAllLocks", proclock);
+		hash_seq_init(&status, LockMethodProcLockHashPartitions[i]);
 
-		lock = proclock->tag.myLock;
-		if (lock)
-			LOCK_PRINT("DumpAllLocks", lock, 0);
-		else
-			elog(LOG, "DumpAllLocks: proclock->tag.myLock = NULL");
+		while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
+		{
+			PROCLOCK_PRINT("DumpAllLocks", proclock);
+
+			lock = proclock->tag.myLock;
+			if (lock)
+				LOCK_PRINT("DumpAllLocks", lock, 0);
+			else
+				elog(LOG, "DumpAllLocks: proclock->tag.myLock = NULL");
+		}
 	}
 }
 #endif   /* LOCK_DEBUG */
@@ -3749,7 +3807,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 	/*
 	 * Find or create a proclock entry with this tag
 	 */
-	proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash,
+	proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash(hashcode),
 														(void *) &proclocktag,
 														proclock_hashcode,
 														HASH_ENTER_NULL,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to