On Fri, May 17, 2024 at 4:46 PM Thomas Munro <thomas.mu...@gmail.com> wrote:
> The specific problem here is that LocalProcessControlFile() runs in
> every launched child for EXEC_BACKEND builds.  Windows uses
> EXEC_BACKEND, and Windows' NTFS file system is one of the two file
> systems known to this list to have the concurrent read/write data
> mashing problem (the other being ext4).

Phngh... this is surprisingly difficult to fix.

Things that don't work: We "just" need to acquire ControlFileLock
while reading the file or examining the object in shared memory, or
get a copy of it, passed through the EXEC_BACKEND BackendParameters
that was acquired while holding the lock, but the current location of
this code in child startup is too early to use LWLocks, and the
postmaster can't acquire locks either so it can't even safely take a
copy to pass on.  You could reorder startup so that we are allowed to
acquire LWLocks in children at that point, but then you'd need to
convince yourself that there is no danger of breaking some ordering
requirement in external preload libraries, and figure out what to do
about children that don't even attach to shared memory.  Maybe that's
possible, but that doesn't sound like a good idea to back-patch.

First idea idea I've come up with to avoid all of that: pass a copy of
the "proto-controlfile", to coin a term for the one read early in
postmaster startup by LocalProcessControlFile().  As far as I know,
the only reason we need it is to suck some settings out of it that
don't change while a cluster is running (mostly can't change after
initdb, and checksums can only be {en,dis}abled while down).  Right?
Children can just "import" that sucker instead of calling
LocalProcessControlFile() to figure out the size of WAL segments yada
yada, I think?  Later they will attach to the real one in shared
memory for all future purposes, once normal interlocking is allowed.

I dunno.  Draft patch attached.  Better plans welcome.  This passes CI
on Linux systems afflicted by EXEC_BACKEND, and Windows.  Thoughts?
From 48c2de14bd9368b4708a99ecbb75452dc327e608 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 18 May 2024 13:41:09 +1200
Subject: [PATCH v1 1/3] Fix pg_control corruption in EXEC_BACKEND startup.

When backend processes were launched in EXEC_BACKEND builds, they would
run LocalProcessControlFile() to read in pg_control and extract several
important settings.

This happens too early to acquire ControlFileLock, and the postmaster is
also not allowed to acquire ControlFileLock, so it can't safely take a
copy to give to the child.

Instead, pass down the "proto-controlfile" that was read by the
postmaster in LocalProcessControlFile().  Introduce functions
ExportProtoControlFile() and ImportProtoControlFile() to allow that.
Subprocesses will extract information from that, and then later attach
to the current control file in shared memory.

Reported-by: Melanie Plageman <melanieplage...@gmail.com> per Windows CI failure
Discussion: https://postgr.es/m/CAAKRu_YNGwEYrorQYza_W8tU%2B%3DtoXRHG8HpyHC-KDbZqA_ZVSA%40mail.gmail.com
---
 src/backend/access/transam/xlog.c       | 46 +++++++++++++++++++++++--
 src/backend/postmaster/launch_backend.c | 19 ++++++----
 src/include/access/xlog.h               |  5 +++
 3 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 330e058c5f2..b69a0d95af9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -568,6 +568,10 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
  */
 static ControlFileData *ControlFile = NULL;
 
+#ifdef EXEC_BACKEND
+static ControlFileData *ProtoControlFile = NULL;
+#endif
+
 /*
  * Calculate the amount of space left on the page after 'endptr'. Beware
  * multiple evaluation!
@@ -686,6 +690,7 @@ static bool PerformRecoveryXLogAction(void);
 static void InitControlFile(uint64 sysidentifier);
 static void WriteControlFile(void);
 static void ReadControlFile(void);
+static void ScanControlFile(void);
 static void UpdateControlFile(void);
 static char *str_time(pg_time_t tnow);
 
@@ -4309,9 +4314,7 @@ WriteControlFile(void)
 static void
 ReadControlFile(void)
 {
-	pg_crc32c	crc;
 	int			fd;
-	static char wal_segsz_str[20];
 	int			r;
 
 	/*
@@ -4344,6 +4347,15 @@ ReadControlFile(void)
 
 	close(fd);
 
+	ScanControlFile();
+}
+
+static void
+ScanControlFile(void)
+{
+	static char wal_segsz_str[20];
+	pg_crc32c	crc;
+
 	/*
 	 * Check for expected pg_control format version.  If this is wrong, the
 	 * CRC check will likely fail because we'll be checking the wrong number
@@ -4815,8 +4827,33 @@ LocalProcessControlFile(bool reset)
 	Assert(reset || ControlFile == NULL);
 	ControlFile = palloc(sizeof(ControlFileData));
 	ReadControlFile();
+
+#ifdef EXEC_BACKEND
+	/* We need to be able to give this to subprocesses. */
+	ProtoControlFile = ControlFile;
+#endif
+}
+
+#ifdef EXEC_BACKEND
+void
+ExportProtoControlFile(ControlFileData *copy)
+{
+	*copy = *ProtoControlFile;
 }
 
+/*
+ * Like LocalProcessControlFile(), but used early in EXEC_BACKEND children's
+ * startup.  This receives the same file that the postmaster first read.
+ */
+void
+ImportProtoControlFile(const ControlFileData *copy)
+{
+	ControlFile = palloc(sizeof(ControlFileData));
+	*ControlFile = *copy;
+	ScanControlFile();
+}
+#endif
+
 /*
  * Get the wal_level from the control file. For a standby, this value should be
  * considered as its active wal_level, because it may be different from what
@@ -4935,7 +4972,12 @@ XLOGShmemInit(void)
 	if (localControlFile)
 	{
 		memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
+#ifdef EXEC_BACKEND
+		/* We still hold a reference to give to subprocesses. */
+		Assert(ProtoControlFile == localControlFile);
+#else
 		pfree(localControlFile);
+#endif
 	}
 
 	/*
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index bdfa238e4fe..40efe738ae1 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -34,6 +34,7 @@
 #include <unistd.h>
 
 #include "access/xlog.h"
+#include "catalog/pg_control.h"
 #include "common/file_utils.h"
 #include "libpq/libpq-be.h"
 #include "libpq/pqsignal.h"
@@ -135,6 +136,14 @@ typedef struct
 	char		my_exec_path[MAXPGPATH];
 	char		pkglib_path[MAXPGPATH];
 
+	/*
+	 * A copy of the ControlFileData from early in Postmaster startup.  We
+	 * need to access its contents it at a phase of initialization before we
+	 * are allowed to acquire LWLocks, so we can't just use shared memory or
+	 * read the file from disk.
+	 */
+	ControlFileData proto_controlfile;
+
 	/*
 	 * These are only used by backend processes, but are here because passing
 	 * a socket needs some special handling on Windows. 'client_sock' is an
@@ -643,12 +652,6 @@ SubPostmasterMain(int argc, char *argv[])
 	 */
 	checkDataDir();
 
-	/*
-	 * (re-)read control file, as it contains config. The postmaster will
-	 * already have read this, but this process doesn't know about that.
-	 */
-	LocalProcessControlFile(false);
-
 	/*
 	 * Reload any libraries that were preloaded by the postmaster.  Since we
 	 * exec'd this process, those libraries didn't come along with us; but we
@@ -746,6 +749,8 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
 
 	param->MaxBackends = MaxBackends;
 
+	ExportProtoControlFile(&param->proto_controlfile);
+
 #ifdef WIN32
 	param->PostmasterHandle = PostmasterHandle;
 	if (!write_duplicated_handle(&param->initial_signal_pipe,
@@ -1018,6 +1023,8 @@ restore_backend_variables(BackendParameters *param)
 
 	strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
 
+	ImportProtoControlFile(&param->proto_controlfile);
+
 	/*
 	 * We need to restore fd.c's counts of externally-opened FDs; to avoid
 	 * confusion, be sure to do this after restoring max_safe_fds.  (Note:
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1a1f11a943f..853f3527ef1 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -195,6 +195,7 @@ typedef enum WALAvailability
 
 struct XLogRecData;
 struct XLogReaderState;
+struct ControlFileData;
 
 extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
 								   XLogRecPtr fpw_lsn,
@@ -234,6 +235,10 @@ extern void XLOGShmemInit(void);
 extern void BootStrapXLOG(void);
 extern void InitializeWalConsistencyChecking(void);
 extern void LocalProcessControlFile(bool reset);
+#ifdef EXEC_BACKEND
+extern void ExportProtoControlFile(struct ControlFileData *copy);
+extern void ImportProtoControlFile(const struct ControlFileData *copy);
+#endif
 extern WalLevel GetActiveWalLevelOnStandby(void);
 extern void StartupXLOG(void);
 extern void ShutdownXLOG(int code, Datum arg);
-- 
2.39.2

Reply via email to