From 031ba7fa3fa48306efd4924309c23646b9042840 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou.vg@hco.ntt.co.jp>
Date: Thu, 11 Mar 2021 17:55:53 +0900
Subject: [PATCH v4 2/9] Add wal_pmem_map to GUC

---
 src/backend/access/transam/xlog.c | 51 ++++++++++++++++++++++++-------
 src/backend/utils/misc/guc.c      | 14 +++++++++
 src/include/access/xlog.h         |  1 +
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f547efd294..8a530f88c0 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -112,6 +112,7 @@ int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
 int			wal_retrieve_retry_interval = 5000;
 int			max_slot_wal_keep_size_mb = -1;
 bool		track_wal_io_timing = false;
+bool		wal_pmem_map = false;
 
 #ifdef WAL_DEBUG
 bool		XLOG_DEBUG = false;
@@ -5131,13 +5132,28 @@ XLOGShmemSize(void)
 {
 	Size		size;
 
+	/*
+	 * If we use WAL segment files as WAL buffers, we don't use the given
+	 * value of wal_buffers. Instead, we set it to the value based on the
+	 * segment size and the page size. This should be done before calculating
+	 * the size of xlblocks array.
+	 */
+	if (wal_pmem_map)
+	{
+		int			npages;
+		char		buf[32];
+
+		npages = wal_segment_size / XLOG_BLCKSZ;
+		snprintf(buf, sizeof(buf), "%d", (int) npages);
+		SetConfigOption("wal_buffers", buf, PGC_POSTMASTER, PGC_S_OVERRIDE);
+	}
 	/*
 	 * If the value of wal_buffers is -1, use the preferred auto-tune value.
 	 * This isn't an amazingly clean place to do this, but we must wait till
 	 * NBuffers has received its final value, and must do it before using the
 	 * value of XLOGbuffers to do anything important.
 	 */
-	if (XLOGbuffers == -1)
+	else if (XLOGbuffers == -1)
 	{
 		char		buf[32];
 
@@ -5153,10 +5169,17 @@ XLOGShmemSize(void)
 	size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
 	/* xlblocks array */
 	size = add_size(size, mul_size(sizeof(XLogRecPtr), XLOGbuffers));
-	/* extra alignment padding for XLOG I/O buffers */
-	size = add_size(size, XLOG_BLCKSZ);
-	/* and the buffers themselves */
-	size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
+
+	/*
+	 * If we use WAL segment files as WAL buffers, we don't need volatile ones.
+	 */
+	if (!wal_pmem_map)
+	{
+		/* extra alignment padding for XLOG I/O buffers */
+		size = add_size(size, XLOG_BLCKSZ);
+		/* and the buffers themselves */
+		size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
+	}
 
 	/*
 	 * Note: we don't count ControlFileData, it comes out of the "slop factor"
@@ -5250,13 +5273,19 @@ XLOGShmemInit(void)
 	}
 
 	/*
-	 * Align the start of the page buffers to a full xlog block size boundary.
-	 * This simplifies some calculations in XLOG insertion. It is also
-	 * required for O_DIRECT.
+	 * If we use WAL segment files as WAL buffers, we don't need volatile ones.
 	 */
-	allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
-	XLogCtl->pages = allocptr;
-	memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
+	if (!wal_pmem_map)
+	{
+		/*
+		 * Align the start of the page buffers to a full xlog block size boundary.
+		 * This simplifies some calculations in XLOG insertion. It is also
+		 * required for O_DIRECT.
+		 */
+		allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
+		XLogCtl->pages = allocptr;
+		memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
+	}
 
 	/*
 	 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e91d5a3cfd..05c9d260d8 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1343,6 +1343,20 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+#ifdef USE_LIBPMEM
+	{
+		{"wal_pmem_map", PGC_POSTMASTER, WAL_SETTINGS,
+			gettext_noop("Map WAL segment files on PMEM as WAL buffers."),
+			gettext_noop("If true, postgres will memory-map WAL segment files "
+						 "on PMEM to use them as WAL buffers instead of the "
+						 "traditional volatile ones."),
+		},
+		&wal_pmem_map,
+		false,
+		NULL, NULL, NULL
+	},
+#endif
+
 	{
 		{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
 			gettext_noop("Logs each checkpoint."),
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 5e2c94a05f..9cc2e1f458 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -90,6 +90,7 @@ extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
 extern bool wal_receiver_create_temp_slot;
 extern bool track_wal_io_timing;
+extern bool wal_pmem_map;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
-- 
2.25.1

