From eb280cbd53a869675b9a2834368eef4e70b4e891 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Mon, 14 Apr 2025 13:34:54 +1200
Subject: [PATCH v1] Eliminate integer divide in fastpath locking code

FAST_PATH_REL_GROUP() was using integer division to determine the
fastpath locking group slot to use for the relation.  While this doesn't
seem to be very hot code, divide is a very slow operation to put in the
fastpath locking code.  In any case, the global variable we're dividing
by is always a power-of-two, so adjust the macro to use a bitwise-AND
rather than a divide.

In passing adjust the code that's setting FastPathLockGroupsPerBackend
so that it's more clear that the value being set is a power-of-two.
Also adjust some comments in the area which contained some magic
numbers.  This seemed better placed in the location where the #defines
were located rather than were they were being used.
---
 src/backend/storage/lmgr/lock.c   |  5 ++++-
 src/backend/utils/init/postinit.c | 37 +++++++++++++++----------------
 src/include/storage/proc.h        |  8 +++++++
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 002303664aa..86b06b9223f 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -210,9 +210,12 @@ int			FastPathLockGroupsPerBackend = 0;
  *
  * The selected constant (49157) is a prime not too close to 2^k, and it's
  * small enough to not cause overflows (in 64-bit).
+ *
+ * We can assume that FastPathLockGroupsPerBackend is a power-of-two per
+ * InitializeFastPathLocks().
  */
 #define FAST_PATH_REL_GROUP(rel) \
-	(((uint64) (rel) * 49157) % FastPathLockGroupsPerBackend)
+	(((uint64) (rel) * 49157) & (FastPathLockGroupsPerBackend - 1))
 
 /*
  * Given the group/slot indexes, calculate the slot index in the whole array
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 01309ef3f86..3438008e06b 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -575,13 +575,6 @@ InitializeMaxBackends(void)
  *
  * This must be called after modules have had the chance to alter GUCs in
  * shared_preload_libraries and before shared memory size is determined.
- *
- * The default max_locks_per_xact=64 means 4 groups by default.
- *
- * We allow anything between 1 and 1024 groups, with the usual power-of-2
- * logic. The 1 is the "old" size with only 16 slots, 1024 is an arbitrary
- * limit (matching max_locks_per_xact = 16k). Values over 1024 are unlikely
- * to be beneficial - there are bottlenecks we'll hit way before that.
  */
 void
 InitializeFastPathLocks(void)
@@ -589,19 +582,25 @@ InitializeFastPathLocks(void)
 	/* Should be initialized only once. */
 	Assert(FastPathLockGroupsPerBackend == 0);
 
-	/* we need at least one group */
-	FastPathLockGroupsPerBackend = 1;
-
-	while (FastPathLockGroupsPerBackend < FP_LOCK_GROUPS_PER_BACKEND_MAX)
-	{
-		/* stop once we exceed max_locks_per_xact */
-		if (FastPathLockSlotsPerBackend() >= max_locks_per_xact)
-			break;
-
-		FastPathLockGroupsPerBackend *= 2;
-	}
+	/*
+	 * Figure out the value for FastPathLockGroupsPerBackend.  We want a
+	 * power-of-two value based off of max_locks_per_xact divided by
+	 * FP_LOCK_SLOTS_PER_GROUP.
+	 *
+	 * FP_LOCK_SLOTS_PER_GROUP is always a power-of-two value and a power of
+	 * two divided by a power of two is still a power of two.  Ensure we never
+	 * go below 1.  Technically the minimum value for max_locks_per_xact is 10
+	 * and the next power of two for that is 16, so we shouldn't ever go below
+	 * 1, but for paranoia's sake, we explicitly clamp the value at 1 because
+	 * 0 isn't a valid value for FastPathLockGroupsPerBackend.
+	 */
+	FastPathLockGroupsPerBackend =
+		Max(Min(pg_nextpower2_32(max_locks_per_xact) / FP_LOCK_SLOTS_PER_GROUP,
+				FP_LOCK_GROUPS_PER_BACKEND_MAX), 1);
 
-	Assert(FastPathLockGroupsPerBackend <= FP_LOCK_GROUPS_PER_BACKEND_MAX);
+	/* Validate we did get a power-of-two */
+	Assert(FastPathLockGroupsPerBackend ==
+		   pg_nextpower2_32(FastPathLockGroupsPerBackend));
 }
 
 /*
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index f51b03d3822..30b3ebd284f 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -86,6 +86,14 @@ struct XidCache
  */
 extern PGDLLIMPORT int FastPathLockGroupsPerBackend;
 
+/*
+ * Define the maximum number of fast-path locking groups per backend.
+ * This must be a power-of-two value.  The actual number of fast-path
+ * locks calculated in InitializeFastPathLocks() based on max_locks_per_xact.
+ * 1024 is an arbitrary limit (matching max_locks_per_xact = 16k).  Values
+ * over 1024 are unlikely to be beneficial - there are bottlenecks we'll hit
+ * way before that.
+ */
 #define		FP_LOCK_GROUPS_PER_BACKEND_MAX	1024
 #define		FP_LOCK_SLOTS_PER_GROUP		16	/* don't change */
 #define		FastPathLockSlotsPerBackend() \
-- 
2.43.0

