On Thu, Mar 25, 2021 at 10:31 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
> We already know that increasing the number of CLOG buffers above the
> current number hurts as the linear search begins to dominate
> (according to the commit message for 5364b357), and it doesn't seem
> great to ship a new feature that melts your CPU when you turn it up.
> Perhaps, to ship this, we need to introduce a buffer mapping table? I
> have attached a "one coffee" attempt at that, on top of your v10 patch
> (unmodified), for discussion. It survives basic testing but I don't
> know how it performs.
Hrrr... Cfbot showed an assertion failure. Here's the two coffee
version with a couple of silly mistakes fixed.
From 4817d16cfb6704d43a7bef12648e753d239c809c Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amboro...@acm.org>
Date: Mon, 15 Feb 2021 21:51:56 +0500
Subject: [PATCH v11 1/2] Make all SLRU buffer sizes configurable
---
doc/src/sgml/config.sgml | 108 ++++++++++++++++++
src/backend/access/transam/clog.c | 6 +
src/backend/access/transam/commit_ts.c | 5 +-
src/backend/access/transam/multixact.c | 8 +-
src/backend/access/transam/subtrans.c | 5 +-
src/backend/commands/async.c | 8 +-
src/backend/storage/lmgr/predicate.c | 4 +-
src/backend/utils/init/globals.c | 8 ++
src/backend/utils/misc/guc.c | 77 +++++++++++++
src/backend/utils/misc/postgresql.conf.sample | 16 +++
src/include/access/multixact.h | 4 -
src/include/access/subtrans.h | 3 -
src/include/commands/async.h | 5 -
src/include/miscadmin.h | 8 ++
src/include/storage/predicate.h | 4 -
15 files changed, 240 insertions(+), 29 deletions(-)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index ddc6d789d8..0adcf0efaf 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1886,6 +1886,114 @@ include_dir 'conf.d'
</para>
</listitem>
</varlistentry>
+
+ <varlistentry id="guc-multixact-offsets-slru-buffers" xreflabel="multixact_offsets_slru_buffers">
+ <term><varname>multixact_offsets_slru_buffers</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>multixact_offsets_slru_buffers</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the amount of memory to be used for MultiXact offsets. MultiXact offsets
+ are used to store information about offsets of multiple row lockers (caused by SELECT FOR UPDATE and others).
+ It defaults to 64 kilobytes (<literal>64KB</literal>).
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="guc-multixact-members-slru-buffers" xreflabel="multixact_members_slru_buffers">
+ <term><varname>multixact_members_slru_buffers</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>multixact_members_slru_buffers</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the amount of memory to be used for MultiXact members. MultiXact members
+ are used to store information about XIDs of multiple row lockers. Typically <varname>multixact_members_slru_buffers</varname>
+ is twice more than <varname>multixact_offsets_slru_buffers</varname>.
+ It defaults to 128 kilobytes (<literal>128KB</literal>).
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="guc-subtrans-buffers" xreflabel="subtrans_slru_buffers">
+ <term><varname>subtrans_slru_buffers</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>subtrans_slru_buffers</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the amount of memory to be used for subtransactions.
+ It defaults to 256 kilobytes (<literal>256KB</literal>).
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="guc-notify-buffers" xreflabel="notify_slru_buffers">
+ <term><varname>notify_slru_buffers</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>notify_slru_buffers</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the amount of memory to be used for asyncronous notifications (NOTIFY, LISTEN).
+ It defaults to 64 kilobytes (<literal>64KB</literal>).
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="guc-serial-buffers" xreflabel="serial_slru_buffers">
+ <term><varname>serial_slru_buffers</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>serial_slru_buffers</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the amount of memory to be used for predicate locks.
+ It defaults to 128 kilobytes (<literal>128KB</literal>).
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="guc-clog-buffers" xreflabel="clog_slru_buffers">
+ <term><varname>clog_slru_buffers</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>clog_slru_buffers</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the amount of memory to be used for CLOG.
+ It defaults to 0, in this case CLOG size is taken as <varname>shared_buffers</varname> / 512.
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="guc-commit-ts-buffers" xreflabel="commit_ts_slru_buffers">
+ <term><varname>commit_ts_slru_buffers</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>commit_ts_slru_buffers</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the amount of memory to be used for commit timestamps.
+ It defaults to 0, in this case CLOG size is taken as <varname>shared_buffers</varname> / 512.
+ This parameter can only be set at server start.
+ </para>
+ </listitem>
+ </varlistentry>
<varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth">
<term><varname>max_stack_depth</varname> (<type>integer</type>)
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 6fa4713fb4..e1d34aa361 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -659,6 +659,9 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
/*
* Number of shared CLOG buffers.
*
+ * If values is configured via GUC - just use given value. Otherwise
+ * apply following euristics.
+ *
* On larger multi-processor systems, it is possible to have many CLOG page
* requests in flight at one time which could lead to disk access for CLOG
* page if the required page is not found in memory. Testing revealed that we
@@ -675,6 +678,9 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
Size
CLOGShmemBuffers(void)
{
+ /* consider 0 and 1 as unset GUC */
+ if (clog_slru_buffers > 1)
+ return clog_slru_buffers;
return Min(128, Max(4, NBuffers / 512));
}
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 48e8d66286..7de3bca63d 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -530,7 +530,10 @@ pg_xact_commit_timestamp_origin(PG_FUNCTION_ARGS)
Size
CommitTsShmemBuffers(void)
{
- return Min(16, Max(4, NBuffers / 1024));
+ /* consider 0 and 1 as unset GUC */
+ if (commit_ts_slru_buffers > 1)
+ return commit_ts_slru_buffers;
+ return Min(16, Max(4, NBuffers / 512));
}
/*
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 1f9f1a1fa1..370c01e72b 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1831,8 +1831,8 @@ MultiXactShmemSize(void)
mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
size = SHARED_MULTIXACT_STATE_SIZE;
- size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTOFFSET_BUFFERS, 0));
- size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTMEMBER_BUFFERS, 0));
+ size = add_size(size, SimpleLruShmemSize(multixact_offsets_slru_buffers, 0));
+ size = add_size(size, SimpleLruShmemSize(multixact_members_slru_buffers, 0));
return size;
}
@@ -1848,13 +1848,13 @@ MultiXactShmemInit(void)
MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
SimpleLruInit(MultiXactOffsetCtl,
- "MultiXactOffset", NUM_MULTIXACTOFFSET_BUFFERS, 0,
+ "MultiXactOffset", multixact_offsets_slru_buffers, 0,
MultiXactOffsetSLRULock, "pg_multixact/offsets",
LWTRANCHE_MULTIXACTOFFSET_BUFFER,
SYNC_HANDLER_MULTIXACT_OFFSET);
SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
SimpleLruInit(MultiXactMemberCtl,
- "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0,
+ "MultiXactMember", multixact_offsets_slru_buffers, 0,
MultiXactMemberSLRULock, "pg_multixact/members",
LWTRANCHE_MULTIXACTMEMBER_BUFFER,
SYNC_HANDLER_MULTIXACT_MEMBER);
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 6a8e521f89..0c24353d3a 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -31,6 +31,7 @@
#include "access/slru.h"
#include "access/subtrans.h"
#include "access/transam.h"
+#include "miscadmin.h"
#include "pg_trace.h"
#include "utils/snapmgr.h"
@@ -184,14 +185,14 @@ SubTransGetTopmostTransaction(TransactionId xid)
Size
SUBTRANSShmemSize(void)
{
- return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
+ return SimpleLruShmemSize(subtrans_slru_buffers, 0);
}
void
SUBTRANSShmemInit(void)
{
SubTransCtl->PagePrecedes = SubTransPagePrecedes;
- SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0,
+ SimpleLruInit(SubTransCtl, "Subtrans", subtrans_slru_buffers, 0,
SubtransSLRULock, "pg_subtrans",
LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE);
SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 4b16fb5682..f5c5592057 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -107,7 +107,7 @@
* frontend during startup.) The above design guarantees that notifies from
* other backends will never be missed by ignoring self-notifies.
*
- * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS)
+ * The amount of shared memory used for notify management (notify_slru_buffers)
* can be varied without affecting anything but performance. The maximum
* amount of notification data that can be queued at one time is determined
* by slru.c's wraparound limit; see QUEUE_MAX_PAGE below.
@@ -225,7 +225,7 @@ typedef struct QueuePosition
*
* Resist the temptation to make this really large. While that would save
* work in some places, it would add cost in others. In particular, this
- * should likely be less than NUM_NOTIFY_BUFFERS, to ensure that backends
+ * should likely be less than notify_slru_buffers, to ensure that backends
* catch up before the pages they'll need to read fall out of SLRU cache.
*/
#define QUEUE_CLEANUP_DELAY 4
@@ -514,7 +514,7 @@ AsyncShmemSize(void)
size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
size = add_size(size, offsetof(AsyncQueueControl, backend));
- size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0));
+ size = add_size(size, SimpleLruShmemSize(notify_slru_buffers, 0));
return size;
}
@@ -562,7 +562,7 @@ AsyncShmemInit(void)
* Set up SLRU management of the pg_notify data.
*/
NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
- SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
+ SimpleLruInit(NotifyCtl, "Notify", notify_slru_buffers, 0,
NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
SYNC_HANDLER_NONE);
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index d493aeef0f..fad8cc572e 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -872,7 +872,7 @@ SerialInit(void)
*/
SerialSlruCtl->PagePrecedes = SerialPagePrecedesLogically;
SimpleLruInit(SerialSlruCtl, "Serial",
- NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial",
+ serial_slru_buffers, 0, SerialSLRULock, "pg_serial",
LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE);
#ifdef USE_ASSERT_CHECKING
SerialPagePrecedesLogicallyUnitTests();
@@ -1395,7 +1395,7 @@ PredicateLockShmemSize(void)
/* Shared memory structures for SLRU tracking of old committed xids. */
size = add_size(size, sizeof(SerialControlData));
- size = add_size(size, SimpleLruShmemSize(NUM_SERIAL_BUFFERS, 0));
+ size = add_size(size, SimpleLruShmemSize(serial_slru_buffers, 0));
return size;
}
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 73e0a672ae..f163ca17e9 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -148,3 +148,11 @@ int64 VacuumPageDirty = 0;
int VacuumCostBalance = 0; /* working state for vacuum */
bool VacuumCostActive = false;
+
+int multixact_offsets_slru_buffers = 8;
+int multixact_members_slru_buffers = 16;
+int subtrans_slru_buffers = 32;
+int notify_slru_buffers = 8;
+int serial_slru_buffers = 16;
+int clog_slru_buffers = 0;
+int commit_ts_slru_buffers = 0;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 0c5dc4d3e8..b65a4ae9ce 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2305,6 +2305,83 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"multixact_offsets_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the number of shared memory buffers used for MultiXact offsets SLRU."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ &multixact_offsets_slru_buffers,
+ 8, 2, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"multixact_members_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the number of shared memory buffers used for MultiXact members SLRU."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ &multixact_members_slru_buffers,
+ 16, 2, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"subtrans_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the number of shared memory buffers used for substransactions SLRU."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ &subtrans_slru_buffers,
+ 32, 2, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"notify_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the number of shared memory buffers used for asyncronous notifications SLRU."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ ¬ify_slru_buffers,
+ 8, 2, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"serial_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the number of shared memory buffers used for predicate locks SLRU."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ &serial_slru_buffers,
+ 16, 2, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"clog_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the number of shared memory buffers used for commit log SLRU."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ &serial_slru_buffers,
+ 0, 0, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
+ {
+ {"commit_ts_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Sets the number of shared memory buffers used for commit timestamps SLRU."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ &commit_ts_slru_buffers,
+ 0, 0, INT_MAX / 2,
+ NULL, NULL, NULL
+ },
+
{
{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
gettext_noop("Sets the maximum number of temporary buffers used by each session."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b234a6bfe6..308fd565d3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -190,6 +190,22 @@
# (change requires restart)
#backend_flush_after = 0 # measured in pages, 0 disables
+# - SLRU Buffers -
+
+#multixact_offsets_slru_buffers = 8 # memory used for MultiXact offsets
+ # (change requires restart)
+#multixact_members_slru_buffers = 16 # memory used for MultiXact members
+ # (change requires restart)
+#subtrans_slru_buffers = 32 # memory used for subtransactions
+ # (change requires restart)
+#notify_slru_buffers = 8 # memory used for asynchronous notifications
+ # (change requires restart)
+#serial_slru_buffers = 16 # memory used for predicate locks
+ # (change requires restart)
+#clog_slru_buffers = 0 # memory used for CLOG
+ # (change requires restart)
+#commit_ts_slru_buffers = 0 # memory used for commit timestamps
+ # (change requires restart)
#------------------------------------------------------------------------------
# WRITE-AHEAD LOG
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 4bbb035eae..97c0a46376 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -29,10 +29,6 @@
#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
-/* Number of SLRU buffers to use for multixact */
-#define NUM_MULTIXACTOFFSET_BUFFERS 8
-#define NUM_MULTIXACTMEMBER_BUFFERS 16
-
/*
* Possible multixact lock modes ("status"). The first four modes are for
* tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index d0ab44ae82..ca0999056e 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -11,9 +11,6 @@
#ifndef SUBTRANS_H
#define SUBTRANS_H
-/* Number of SLRU buffers to use for subtrans */
-#define NUM_SUBTRANS_BUFFERS 32
-
extern void SubTransSetParent(TransactionId xid, TransactionId parent);
extern TransactionId SubTransGetParent(TransactionId xid);
extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index 9217f66b91..fa831e3721 100644
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -15,11 +15,6 @@
#include <signal.h>
-/*
- * The number of SLRU page buffers we use for the notification queue.
- */
-#define NUM_NOTIFY_BUFFERS 8
-
extern bool Trace_notify;
extern volatile sig_atomic_t notifyInterruptPending;
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 013850ac28..3d9f585fb9 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -162,6 +162,14 @@ extern PGDLLIMPORT int MaxBackends;
extern PGDLLIMPORT int MaxConnections;
extern PGDLLIMPORT int max_worker_processes;
extern PGDLLIMPORT int max_parallel_workers;
+extern PGDLLIMPORT int multixact_offsets_slru_buffers;
+extern PGDLLIMPORT int multixact_members_slru_buffers;
+extern PGDLLIMPORT int multixact_members_slru_buffers;
+extern PGDLLIMPORT int subtrans_slru_buffers;
+extern PGDLLIMPORT int notify_slru_buffers;
+extern PGDLLIMPORT int serial_slru_buffers;
+extern PGDLLIMPORT int clog_slru_buffers;
+extern PGDLLIMPORT int commit_ts_slru_buffers;
extern PGDLLIMPORT int MyProcPid;
extern PGDLLIMPORT pg_time_t MyStartTime;
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
index 152b698611..c72779bd88 100644
--- a/src/include/storage/predicate.h
+++ b/src/include/storage/predicate.h
@@ -26,10 +26,6 @@ extern int max_predicate_locks_per_xact;
extern int max_predicate_locks_per_relation;
extern int max_predicate_locks_per_page;
-
-/* Number of SLRU buffers to use for Serial SLRU */
-#define NUM_SERIAL_BUFFERS 16
-
/*
* A handle used for sharing SERIALIZABLEXACT objects between the participants
* in a parallel query.
--
2.30.1
From 65600b53939c34abf43e62f3f59be5671c43d301 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 25 Mar 2021 10:11:31 +1300
Subject: [PATCH v11 2/2] Add buffer mapping table for SLRUs.
---
src/backend/access/transam/slru.c | 87 ++++++++++++++++++++++++++++---
src/include/access/slru.h | 2 +
2 files changed, 83 insertions(+), 6 deletions(-)
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 82149ad782..487585bb60 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -58,6 +58,8 @@
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/shmem.h"
+#include "utils/dynahash.h"
+#include "utils/hsearch.h"
#define SlruFileName(ctl, path, seg) \
snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
@@ -79,6 +81,12 @@ typedef struct SlruWriteAllData
typedef struct SlruWriteAllData *SlruWriteAll;
+typedef struct SlruMappingTableEntry
+{
+ int pageno;
+ int slotno;
+} SlruMappingTableEntry;
+
/*
* Populate a file tag describing a segment file. We only use the segment
* number, since we can derive everything else we need by having separate
@@ -146,6 +154,9 @@ static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
int segpage, void *data);
static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
+static void SlruMappingAdd(SlruCtl ctl, int pageno, int slotno);
+static void SlruMappingRemove(SlruCtl ctl, int pageno);
+static int SlruMappingFind(SlruCtl ctl, int pageno);
/*
* Initialization of shared memory
@@ -168,7 +179,8 @@ SimpleLruShmemSize(int nslots, int nlsns)
if (nlsns > 0)
sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
- return BUFFERALIGN(sz) + BLCKSZ * nslots;
+ return BUFFERALIGN(sz) + BLCKSZ * nslots +
+ hash_estimate_size(nslots, sizeof(SlruMappingTableEntry));
}
/*
@@ -187,6 +199,9 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
LWLock *ctllock, const char *subdir, int tranche_id,
SyncRequestHandler sync_handler)
{
+ char mapping_table_name[SHMEM_INDEX_KEYSIZE];
+ HASHCTL mapping_table_info;
+ HTAB *mapping_table;
SlruShared shared;
bool found;
@@ -258,11 +273,21 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
else
Assert(found);
+ /* Create or find the buffer mapping table. */
+ memset(&mapping_table_info, 0, sizeof(mapping_table_info));
+ mapping_table_info.keysize = sizeof(int);
+ mapping_table_info.entrysize = sizeof(SlruMappingTableEntry);
+ snprintf(mapping_table_name, sizeof(mapping_table_name),
+ "%s Mapping Table", name);
+ mapping_table = ShmemInitHash(mapping_table_name, nslots, nslots,
+ &mapping_table_info, HASH_ELEM | HASH_BLOBS);
+
/*
* Initialize the unshared control struct, including directory path. We
* assume caller set PagePrecedes.
*/
ctl->shared = shared;
+ ctl->mapping_table = mapping_table;
ctl->sync_handler = sync_handler;
strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
}
@@ -289,6 +314,9 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno)
shared->page_number[slotno] == pageno);
/* Mark the slot as containing this page */
+ if (shared->page_status[slotno] != SLRU_PAGE_EMPTY)
+ SlruMappingRemove(ctl, shared->page_number[slotno]);
+ SlruMappingAdd(ctl, pageno, slotno);
shared->page_number[slotno] = pageno;
shared->page_status[slotno] = SLRU_PAGE_VALID;
shared->page_dirty[slotno] = true;
@@ -362,7 +390,10 @@ SimpleLruWaitIO(SlruCtl ctl, int slotno)
{
/* indeed, the I/O must have failed */
if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
+ {
+ SlruMappingRemove(ctl, shared->page_number[slotno]);
shared->page_status[slotno] = SLRU_PAGE_EMPTY;
+ }
else /* write_in_progress */
{
shared->page_status[slotno] = SLRU_PAGE_VALID;
@@ -436,6 +467,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
!shared->page_dirty[slotno]));
/* Mark the slot read-busy */
+ SlruMappingAdd(ctl, pageno, slotno);
shared->page_number[slotno] = pageno;
shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
shared->page_dirty[slotno] = false;
@@ -459,7 +491,13 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
!shared->page_dirty[slotno]);
- shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
+ if (ok)
+ shared->page_status[slotno] = SLRU_PAGE_VALID;
+ else
+ {
+ SlruMappingRemove(ctl, pageno);
+ shared->page_status[slotno] = SLRU_PAGE_EMPTY;
+ }
LWLockRelease(&shared->buffer_locks[slotno].lock);
@@ -1029,11 +1067,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
int best_invalid_page_number = 0; /* keep compiler quiet */
/* See if page already has a buffer assigned */
- for (slotno = 0; slotno < shared->num_slots; slotno++)
+ slotno = SlruMappingFind(ctl, pageno);
+ if (slotno >= 0)
{
- if (shared->page_number[slotno] == pageno &&
- shared->page_status[slotno] != SLRU_PAGE_EMPTY)
- return slotno;
+ Assert(shared->page_number[slotno] == pageno);
+ Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
+ return slotno;
}
/*
@@ -1266,6 +1305,7 @@ restart:;
if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
!shared->page_dirty[slotno])
{
+ SlruMappingRemove(ctl, shared->page_number[slotno]);
shared->page_status[slotno] = SLRU_PAGE_EMPTY;
continue;
}
@@ -1348,6 +1388,7 @@ restart:
if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
!shared->page_dirty[slotno])
{
+ SlruMappingRemove(ctl, shared->page_number[slotno]);
shared->page_status[slotno] = SLRU_PAGE_EMPTY;
continue;
}
@@ -1609,3 +1650,37 @@ SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
errno = save_errno;
return result;
}
+
+static int
+SlruMappingFind(SlruCtl ctl, int pageno)
+{
+ SlruMappingTableEntry *mapping;
+
+ mapping = hash_search(ctl->mapping_table, &pageno, HASH_FIND, NULL);
+ if (mapping)
+ return mapping->slotno;
+
+ return -1;
+}
+
+static void
+SlruMappingAdd(SlruCtl ctl, int pageno, int slotno)
+{
+ SlruMappingTableEntry *mapping;
+ bool found PG_USED_FOR_ASSERTS_ONLY;
+
+ mapping = hash_search(ctl->mapping_table, &pageno, HASH_ENTER, &found);
+ mapping->slotno = slotno;
+
+ Assert(!found);
+}
+
+static void
+SlruMappingRemove(SlruCtl ctl, int pageno)
+{
+ bool found PG_USED_FOR_ASSERTS_ONLY;
+
+ hash_search(ctl->mapping_table, &pageno, HASH_REMOVE, &found);
+
+ Assert(found);
+}
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index dd52e8cec7..8aa3efc0ee 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -16,6 +16,7 @@
#include "access/xlogdefs.h"
#include "storage/lwlock.h"
#include "storage/sync.h"
+#include "utils/hsearch.h"
/*
@@ -110,6 +111,7 @@ typedef SlruSharedData *SlruShared;
typedef struct SlruCtlData
{
SlruShared shared;
+ HTAB *mapping_table;
/*
* Which sync handler function to use when handing sync requests over to
--
2.30.1