Andrey Borodin wrote 2022-07-23 11:39:

Yura, thank you for your benchmarks!
We already knew that patch can save the day on pathological workloads,
now we have a proof of this.
Also there's the evidence that user can blindly increase size of SLRU
if they want (with the second patch). So there's no need for hard
explanations on how to tune the buffers size.

Hi @Andrey.Borodin, With some considerations and performance checks from @Yura.Sokolov we simplified your approach by the following:

1. Preamble. We feel free to increase any SLRU's, since there's no performance degradation on large Buffers count using your SLRU buckets solution. 2. `slru_buffers_size_scale` is only one config param introduced for all SLRUs. It scales SLRUs upper cap by power 2. 3. All SLRU buffers count are capped by both `MBuffers (shared_buffers)` and `slru_buffers_size_scale`. see 4. Magic initial constants `NUM_*_BUFFERS << slru_buffers_size_scale` are applied for every SLRU. 5. All SLRU buffers are always sized as power of 2, their hash bucket size is always 8.

There's attached patch for your consideration. It does gather and simplify both `v21-0001-Make-all-SLRU-buffer-sizes-configurable.patch` and `v21-0002-Divide-SLRU-buffers-into-8-associative-banks.patch` to much simpler approach.

Thank you, Yours,
- Ivan
Index: src/include/miscadmin.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
--- a/src/include/miscadmin.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/miscadmin.h	(date 1660678108730)
@@ -176,6 +176,7 @@
 extern PGDLLIMPORT int MaxConnections;
 extern PGDLLIMPORT int max_worker_processes;
 extern PGDLLIMPORT int max_parallel_workers;
+extern PGDLLIMPORT int slru_buffers_size_scale;
 
 extern PGDLLIMPORT int MyProcPid;
 extern PGDLLIMPORT pg_time_t MyStartTime;
Index: src/include/access/subtrans.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
--- a/src/include/access/subtrans.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/subtrans.h	(date 1660678108698)
@@ -12,7 +12,7 @@
 #define SUBTRANS_H
 
 /* Number of SLRU buffers to use for subtrans */
-#define NUM_SUBTRANS_BUFFERS	32
+#define NUM_SUBTRANS_BUFFERS	(32 << slru_buffers_size_scale)
 
 extern void SubTransSetParent(TransactionId xid, TransactionId parent);
 extern TransactionId SubTransGetParent(TransactionId xid);
Index: src/backend/utils/init/globals.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
--- a/src/backend/utils/init/globals.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/init/globals.c	(date 1660678064048)
@@ -150,3 +150,5 @@
 
 int			VacuumCostBalance = 0;	/* working state for vacuum */
 bool		VacuumCostActive = false;
+
+int			slru_buffers_size_scale = 2;	/* power 2 scale for SLRU buffers */
Index: src/backend/access/transam/slru.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
--- a/src/backend/access/transam/slru.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/slru.c	(date 1660678063980)
@@ -59,6 +59,7 @@
 #include "pgstat.h"
 #include "storage/fd.h"
 #include "storage/shmem.h"
+#include "port/pg_bitutils.h"
 
 #define SlruFileName(ctl, path, seg) \
 	snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
@@ -71,6 +72,17 @@
  */
 #define MAX_WRITEALL_BUFFERS	16
 
+/*
+ * To avoid overflowing internal arithmetic and the size_t data type, the
+ * number of buffers should not exceed this number.
+ */
+#define SLRU_MAX_ALLOWED_BUFFERS ((1024 * 1024 * 1024) / BLCKSZ)
+
+/*
+ * SLRU bank size for slotno hash banks
+ */
+#define SLRU_BANK_SIZE 8
+
 typedef struct SlruWriteAllData
 {
 	int			num_files;		/* # files actually open */
@@ -134,7 +146,7 @@
 static SlruErrorCause slru_errcause;
 static int	slru_errno;
 
-
+static void SlruAdjustNSlots(int *nslots, int *bankmask);
 static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
 static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
 static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
@@ -148,6 +160,25 @@
 									  int segpage, void *data);
 static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
 
+/*
+ * Pick number of slots and bank size optimal for hashed associative SLRU buffers.
+ * We declare SLRU nslots is always power of 2.
+ * We split SLRU to 8-sized hash banks, after some performance benchmarks.
+ * We hash pageno to banks by pageno masked by 3 upper bits.
+ */
+static void
+SlruAdjustNSlots(int *nslots, int *bankmask)
+{
+	Assert(*nslots > 0);
+	Assert(*nslots <= SLRU_MAX_ALLOWED_BUFFERS);
+
+	*nslots = (int) pg_nextpower2_32(Max(SLRU_BANK_SIZE, Min(*nslots, NBuffers / 256)));
+
+	*bankmask = *nslots / SLRU_BANK_SIZE - 1;
+
+	elog(DEBUG5, "nslots %d banksize %d nbanks %d bankmask %x", *nslots, SLRU_BANK_SIZE, *nslots / SLRU_BANK_SIZE, *bankmask);
+}
+
 /*
  * Initialization of shared memory
  */
@@ -156,6 +187,9 @@
 SimpleLruShmemSize(int nslots, int nlsns)
 {
 	Size		sz;
+	int			bankmask_ignore;
+
+	SlruAdjustNSlots(&nslots, &bankmask_ignore);
 
 	/* we assume nslots isn't so large as to risk overflow */
 	sz = MAXALIGN(sizeof(SlruSharedData));
@@ -190,6 +224,9 @@
 {
 	SlruShared	shared;
 	bool		found;
+	int			bankmask;
+
+	SlruAdjustNSlots(&nslots, &bankmask);
 
 	shared = (SlruShared) ShmemInitStruct(name,
 										  SimpleLruShmemSize(nslots, nlsns),
@@ -257,7 +294,10 @@
 		Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
 	}
 	else
+	{
 		Assert(found);
+		Assert(shared->num_slots == nslots);
+	}
 
 	/*
 	 * Initialize the unshared control struct, including directory path. We
@@ -265,6 +305,7 @@
 	 */
 	ctl->shared = shared;
 	ctl->sync_handler = sync_handler;
+	ctl->bank_mask = bankmask;
 	strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
 }
 
@@ -496,12 +537,14 @@
 {
 	SlruShared	shared = ctl->shared;
 	int			slotno;
+	int			bankstart = (pageno & ctl->bank_mask) * SLRU_BANK_SIZE;
+	int			bankend = bankstart + SLRU_BANK_SIZE;
 
 	/* Try to find the page while holding only shared lock */
 	LWLockAcquire(shared->ControlLock, LW_SHARED);
 
 	/* See if page is already in a buffer */
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
+	for (slotno = bankstart; slotno < bankend; slotno++)
 	{
 		if (shared->page_number[slotno] == pageno &&
 			shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
@@ -1030,7 +1073,10 @@
 		int			best_invalid_page_number = 0;	/* keep compiler quiet */
 
 		/* See if page already has a buffer assigned */
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
+		int			bankstart = (pageno & ctl->bank_mask) * SLRU_BANK_SIZE;
+		int			bankend = bankstart + SLRU_BANK_SIZE;
+
+		for (slotno = bankstart; slotno < bankend; slotno++)
 		{
 			if (shared->page_number[slotno] == pageno &&
 				shared->page_status[slotno] != SLRU_PAGE_EMPTY)
@@ -1065,7 +1111,7 @@
 		 * multiple pages with the same lru_count.
 		 */
 		cur_count = (shared->cur_lru_count)++;
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
+		for (slotno = bankstart; slotno < bankend; slotno++)
 		{
 			int			this_delta;
 			int			this_page_number;
Index: src/backend/access/transam/clog.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
--- a/src/backend/access/transam/clog.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/clog.c	(date 1660677867877)
@@ -74,6 +74,8 @@
 #define GetLSNIndex(slotno, xid)	((slotno) * CLOG_LSNS_PER_PAGE + \
 	((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
 
+#define NUM_CLOG_BUFFERS 	(128 << slru_buffers_size_scale)
+
 /*
  * The number of subtransactions below which we consider to apply clog group
  * update optimization.  Testing reveals that the number higher than this can
@@ -661,42 +663,20 @@
 	return status;
 }
 
-/*
- * Number of shared CLOG buffers.
- *
- * On larger multi-processor systems, it is possible to have many CLOG page
- * requests in flight at one time which could lead to disk access for CLOG
- * page if the required page is not found in memory.  Testing revealed that we
- * can get the best performance by having 128 CLOG buffers, more than that it
- * doesn't improve performance.
- *
- * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
- * a good idea, because it would increase the minimum amount of shared memory
- * required to start, which could be a problem for people running very small
- * configurations.  The following formula seems to represent a reasonable
- * compromise: people with very low values for shared_buffers will get fewer
- * CLOG buffers as well, and everyone else will get 128.
- */
-Size
-CLOGShmemBuffers(void)
-{
-	return Min(128, Max(4, NBuffers / 512));
-}
-
 /*
  * Initialization of shared memory for CLOG
  */
 Size
 CLOGShmemSize(void)
 {
-	return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
+	return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
 }
 
 void
 CLOGShmemInit(void)
 {
 	XactCtl->PagePrecedes = CLOGPagePrecedes;
-	SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
+	SimpleLruInit(XactCtl, "Xact", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
 				  XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
 				  SYNC_HANDLER_CLOG);
 	SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
Index: src/include/storage/predicate.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
--- a/src/include/storage/predicate.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/storage/predicate.h	(date 1660678108718)
@@ -28,7 +28,7 @@
 
 
 /* Number of SLRU buffers to use for Serial SLRU */
-#define NUM_SERIAL_BUFFERS		16
+#define NUM_SERIAL_BUFFERS	(16 << slru_buffers_size_scale)
 
 /*
  * A handle used for sharing SERIALIZABLEXACT objects between the participants
Index: src/backend/utils/misc/postgresql.conf.sample
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
--- a/src/backend/utils/misc/postgresql.conf.sample	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/misc/postgresql.conf.sample	(date 1660678108650)
@@ -155,6 +155,8 @@
 					#   mmap
 					# (change requires restart)
 #min_dynamic_shared_memory = 0MB	# (change requires restart)
+#slru_buffers_size_scale = 2 # SLRU buffers size scale of power 2, range 0..7
+					# (change requires restart)
 
 # - Disk -
 
Index: src/include/access/multixact.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
--- a/src/include/access/multixact.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/multixact.h	(date 1660678108678)
@@ -30,8 +30,8 @@
 #define MaxMultiXactOffset	((MultiXactOffset) 0xFFFFFFFF)
 
 /* Number of SLRU buffers to use for multixact */
-#define NUM_MULTIXACTOFFSET_BUFFERS		8
-#define NUM_MULTIXACTMEMBER_BUFFERS		16
+#define NUM_MULTIXACTOFFSET_BUFFERS		(16 << slru_buffers_size_scale)
+#define NUM_MULTIXACTMEMBER_BUFFERS		(32 << slru_buffers_size_scale)
 
 /*
  * Possible multixact lock modes ("status").  The first four modes are for
Index: src/backend/utils/misc/guc.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
--- a/src/backend/utils/misc/guc.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/utils/misc/guc.c	(date 1660678064216)
@@ -32,9 +32,11 @@
 #endif
 #include <unistd.h>
 
+#include "access/clog.h"
 #include "access/commit_ts.h"
 #include "access/gin.h"
 #include "access/rmgr.h"
+#include "access/slru.h"
 #include "access/tableam.h"
 #include "access/toast_compression.h"
 #include "access/transam.h"
@@ -2375,6 +2377,16 @@
 		-1, -1, INT_MAX,
 		NULL, NULL, NULL
 	},
+
+	{
+		{"slru_buffers_size_scale", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("SLRU buffers size scale of power 2"),
+			NULL
+		},
+		&slru_buffers_size_scale,
+		2, 0, 7,
+		NULL, NULL, NULL
+	},
 
 	{
 		{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
Index: src/include/commands/async.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
--- a/src/include/commands/async.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/commands/async.h	(date 1660678108702)
@@ -18,7 +18,7 @@
 /*
  * The number of SLRU page buffers we use for the notification queue.
  */
-#define NUM_NOTIFY_BUFFERS	8
+#define NUM_NOTIFY_BUFFERS	(16 << slru_buffers_size_scale)
 
 extern bool Trace_notify;
 extern volatile sig_atomic_t notifyInterruptPending;
Index: src/include/access/slru.h
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
--- a/src/include/access/slru.h	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/include/access/slru.h	(date 1660678108690)
@@ -134,6 +134,11 @@
 	 * it's always the same, it doesn't need to be in shared memory.
 	 */
 	char		Dir[64];
+
+	/*
+	 * mask for slotno hash bank
+	 */
+	Size		bank_mask;
 } SlruCtlData;
 
 typedef SlruCtlData *SlruCtl;
Index: src/backend/access/transam/subtrans.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
--- a/src/backend/access/transam/subtrans.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/subtrans.c	(date 1660678064032)
@@ -31,6 +31,7 @@
 #include "access/slru.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
+#include "miscadmin.h"
 #include "pg_trace.h"
 #include "utils/snapmgr.h"
 
Index: doc/src/sgml/config.sgml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
--- a/doc/src/sgml/config.sgml	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/doc/src/sgml/config.sgml	(date 1660677867861)
@@ -1953,6 +1953,37 @@
       </listitem>
      </varlistentry>
 
+    <varlistentry id="guc-slru-buffers-size-scale" xreflabel="slru_buffers_size_scale">
+     <term><varname>slru_buffers_size_scale</varname> (<type>integer</type>)
+     <indexterm>
+      <primary><varname>slru_buffers_size_scale</varname> configuration parameter</primary>
+     </indexterm>
+     </term>
+     <listitem>
+      <para>
+       Specifies power 2 scale for all SLRU shared memory buffers sizes. Buffers sizes depends on
+       both <literal>guc_slru_buffers_size_scale</literal> and <literal>shared_buffers</literal> params.
+      </para>
+      <para>
+       This affects on buffers in the list below (see also <xref linkend="pgdata-contents-table"/>):
+        <itemizedlist>
+         <listitem><para><literal>NUM_MULTIXACTOFFSET_BUFFERS = Min(32 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_MULTIXACTMEMBER_BUFFERS = Min(64 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_SUBTRANS_BUFFERS = Min(64 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_NOTIFY_BUFFERS = Min(32 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_SERIAL_BUFFERS = Min(32 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_CLOG_BUFFERS = Min(128 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+         <listitem><para><literal>NUM_COMMIT_TS_BUFFERS = Min(128 &lt;&lt; slru_buffers_size_scale, shared_buffers/256)</literal></para></listitem>
+        </itemizedlist>
+      </para>
+      <para>
+       Value is in <literal>0..7</literal> bounds.
+       The default value is <literal>2</literal>.
+       This parameter can only be set at server start.
+      </para>
+     </listitem>
+    </varlistentry>
+
      <varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth">
       <term><varname>max_stack_depth</varname> (<type>integer</type>)
       <indexterm>
Index: src/backend/access/transam/commit_ts.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
--- a/src/backend/access/transam/commit_ts.c	(revision 020258fbd30d37ddd03d0ec68264d1544f8d2838)
+++ b/src/backend/access/transam/commit_ts.c	(date 1660677867889)
@@ -73,6 +73,8 @@
 #define TransactionIdToCTsEntry(xid)	\
 	((xid) % (TransactionId) COMMIT_TS_XACTS_PER_PAGE)
 
+#define NUM_COMMIT_TS_BUFFERS	(128 << slru_buffers_size_scale)
+
 /*
  * Link to shared-memory data structures for CommitTs control
  */
@@ -508,26 +510,13 @@
 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
 }
 
-/*
- * Number of shared CommitTS buffers.
- *
- * We use a very similar logic as for the number of CLOG buffers (except we
- * scale up twice as fast with shared buffers, and the maximum is twice as
- * high); see comments in CLOGShmemBuffers.
- */
-Size
-CommitTsShmemBuffers(void)
-{
-	return Min(256, Max(4, NBuffers / 256));
-}
-
 /*
  * Shared memory sizing for CommitTs
  */
 Size
 CommitTsShmemSize(void)
 {
-	return SimpleLruShmemSize(CommitTsShmemBuffers(), 0) +
+	return SimpleLruShmemSize(NUM_COMMIT_TS_BUFFERS, 0) +
 		sizeof(CommitTimestampShared);
 }
 
@@ -541,7 +530,7 @@
 	bool		found;
 
 	CommitTsCtl->PagePrecedes = CommitTsPagePrecedes;
-	SimpleLruInit(CommitTsCtl, "CommitTs", CommitTsShmemBuffers(), 0,
+	SimpleLruInit(CommitTsCtl, "CommitTs", NUM_COMMIT_TS_BUFFERS, 0,
 				  CommitTsSLRULock, "pg_commit_ts",
 				  LWTRANCHE_COMMITTS_BUFFER,
 				  SYNC_HANDLER_COMMIT_TS);

Reply via email to