On Sat, Apr 2, 2022 at 10:03 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
> Another idea would be to call a new function DropPendingWritebacks(),
> and also tell all the SMgrRelation objects to close all their internal
> state (ie the fds + per-segment objects) but not free the main
> SMgrRelationData object, and for good measure also invalidate the
> small amount of cached state (which hasn't been mentioned in this
> thread, but it kinda bothers me that that state currently survives, so
> it was one unspoken reason I liked the smgrcloseall() idea).  Since
> DropPendingWritebacks() is in a rather different module, perhaps if we
> were to do that we'd want to rename PROCSIGNAL_BARRIER_SMGRRELEASE to
> something else, because it's not really a SMGR-only thing anymore.

Here's a sketch of that idea.
From 58262253e167e25d4bbb6777574a98fb12850c29 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 2 Apr 2022 17:05:39 +1300
Subject: [PATCH 1/2] WIP: Rethink PROCSIGNAL_BARRIER_SMGRRELEASE.

With sufficiently bad luck, it was possible for IssuePendingWritebacks()
to reopen relation files just after we'd processed
PROCSIGNAL_BARRIER_SMGRRELEASE but before the file was unlinked by some
other backend.

While commit 4eb21763 initially appeared to have fixed the spate of test
failures we'd seen on Windows, testing with extra instrumentation to
look out for writes to unlinked files revealed the dangerous nature of
these deferred file references.

This second attempt introduces smgrrelease(reln), smgrreleaseall(), and
smgrreleaseallcount().  That last is used by IssuedPendingWritebacks()
to notice when pending writebacks have to be consider potentially
invalid, by watching out for level changes during the time writeback
instructions were accumulated.

XXX Experimental

Reported-by: Andres Freund <and...@anarazel.de>
Discussion: https://postgr.es/m/20220209220004.kb3dgtn2x2k2gtdm%40alap3.anarazel.de
---
 src/backend/storage/buffer/bufmgr.c | 14 +++++++
 src/backend/storage/smgr/md.c       |  6 ---
 src/backend/storage/smgr/smgr.c     | 58 +++++++++++++++++++++++++----
 src/include/storage/buf_internals.h |  3 ++
 src/include/storage/md.h            |  1 -
 src/include/storage/smgr.h          |  3 ++
 6 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index d73a40c1bc..43652b757a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4873,6 +4873,7 @@ WritebackContextInit(WritebackContext *context, int *max_pending)
 
 	context->max_pending = max_pending;
 	context->nr_pending = 0;
+	context->smgr_release_count = smgrreleaseallcount();
 }
 
 /*
@@ -4924,6 +4925,18 @@ IssuePendingWritebacks(WritebackContext *context)
 {
 	int			i;
 
+	/*
+	 * Throw away the contents if smgrreleaseall() has been invoked since
+	 * "context" was initialized or cleared.  Since the main loop below doesn't
+	 * check for interrupts or otherwise handle barrier requests, we don't have
+	 * to worry about invalidation if we get past this check.
+	 */
+	if (context->smgr_release_count != smgrreleaseallcount())
+	{
+		context->nr_pending = 0;
+		context->smgr_release_count = smgrreleaseallcount();
+	}
+
 	if (context->nr_pending == 0)
 		return;
 
@@ -4983,6 +4996,7 @@ IssuePendingWritebacks(WritebackContext *context)
 	}
 
 	context->nr_pending = 0;
+	context->smgr_release_count = smgrreleaseallcount();
 }
 
 
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 879f647dbc..d26c915f90 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -549,12 +549,6 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
 	}
 }
 
-void
-mdrelease(void)
-{
-	closeAllVfds();
-}
-
 /*
  *	mdprefetch() -- Initiate asynchronous read of the specified block of a relation
  */
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index d71a557a35..087c8e76d1 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -41,7 +41,6 @@ typedef struct f_smgr
 {
 	void		(*smgr_init) (void);	/* may be NULL */
 	void		(*smgr_shutdown) (void);	/* may be NULL */
-	void		(*smgr_release) (void); /* may be NULL */
 	void		(*smgr_open) (SMgrRelation reln);
 	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
 	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
@@ -70,7 +69,6 @@ static const f_smgr smgrsw[] = {
 	{
 		.smgr_init = mdinit,
 		.smgr_shutdown = NULL,
-		.smgr_release = mdrelease,
 		.smgr_open = mdopen,
 		.smgr_close = mdclose,
 		.smgr_create = mdcreate,
@@ -100,6 +98,8 @@ static dlist_head unowned_relns;
 /* local function prototypes */
 static void smgrshutdown(int code, Datum arg);
 
+static uint64 releaseall_count = 0;
+
 
 /*
  *	smgrinit(), smgrshutdown() -- Initialize or shut down storage
@@ -281,6 +281,44 @@ smgrclose(SMgrRelation reln)
 		*owner = NULL;
 }
 
+/*
+ *	smgrrelease() -- Release all resources used by this object.
+ *
+ *	The object remains valid.
+ */
+void
+smgrrelease(SMgrRelation reln)
+{
+	for (int i = 0; i <= MAX_FORKNUM; ++i)
+	{
+		smgrsw[reln->smgr_which].smgr_close(reln, i);
+		reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
+	}
+}
+
+/*
+ *	smgrreleaseall() -- Release resources used by all objects.
+ *
+ *	This is called for PROCSIGNAL_BARRIER_SMGRRELEASE.
+ */
+void
+smgrreleaseall(void)
+{
+	HASH_SEQ_STATUS status;
+	SMgrRelation reln;
+
+	/* Nothing to do if hashtable not set up */
+	if (SMgrRelationHash == NULL)
+		return;
+
+	hash_seq_init(&status, SMgrRelationHash);
+
+	while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
+		smgrrelease(reln);
+
+	releaseall_count++;
+}
+
 /*
  *	smgrcloseall() -- Close all existing SMgrRelation objects.
  */
@@ -696,6 +734,15 @@ AtEOXact_SMgr(void)
 	}
 }
 
+/*
+ * How many times has smgrreleaseall() run in this backend?
+ */
+uint64
+smgrreleaseallcount(void)
+{
+	return releaseall_count;
+}
+
 /*
  * This routine is called when we are ordered to release all open files by a
  * ProcSignalBarrier.
@@ -703,11 +750,6 @@ AtEOXact_SMgr(void)
 bool
 ProcessBarrierSmgrRelease(void)
 {
-	for (int i = 0; i < NSmgr; i++)
-	{
-		if (smgrsw[i].smgr_release)
-			smgrsw[i].smgr_release();
-	}
-
+	smgrreleaseall();
 	return true;
 }
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index b903d2bcaf..b2b8856a34 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -270,6 +270,9 @@ typedef struct WritebackContext
 	/* current number of pending writeback requests */
 	int			nr_pending;
 
+	/* smgrrelease() count for this backend, before anything is scheduled */
+	uint64		smgr_release_count;
+
 	/* pending requests */
 	PendingWriteback pending_writebacks[WRITEBACK_MAX_PENDING_FLUSHES];
 } WritebackContext;
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index 6e46d8d96a..ffffa40db7 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -23,7 +23,6 @@
 extern void mdinit(void);
 extern void mdopen(SMgrRelation reln);
 extern void mdclose(SMgrRelation reln, ForkNumber forknum);
-extern void mdrelease(void);
 extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
 extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 8e3ef92cda..4661971003 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -85,6 +85,9 @@ extern void smgrclearowner(SMgrRelation *owner, SMgrRelation reln);
 extern void smgrclose(SMgrRelation reln);
 extern void smgrcloseall(void);
 extern void smgrclosenode(RelFileNodeBackend rnode);
+extern void smgrrelease(SMgrRelation reln);
+extern void smgrreleaseall(void);
+extern uint64 smgrreleaseallcount(void);
 extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrdosyncall(SMgrRelation *rels, int nrels);
 extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
-- 
2.35.1

From 70d6d58ccc4d4dc1aaf39b8f11dcdf82d71c7dac Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Mon, 21 Feb 2022 16:42:16 -0800
Subject: [PATCH 2/2] WIP: fix old-fd issues using global barriers everywhere.

---
 src/backend/commands/tablespace.c | 10 +++++-----
 src/include/pg_config_manual.h    |  9 ---------
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index 40514ab550..0c433360b6 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -1574,6 +1574,11 @@ tblspc_redo(XLogReaderState *record)
 	{
 		xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
 
+#if defined(USE_BARRIER_SMGRRELEASE)
+		/* Close all smgr fds in all backends. */
+		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+#endif
+
 		/*
 		 * If we issued a WAL record for a drop tablespace it implies that
 		 * there were no files in it at all when the DROP was done. That means
@@ -1591,11 +1596,6 @@ tblspc_redo(XLogReaderState *record)
 		 */
 		if (!destroy_tablespace_directories(xlrec->ts_id, true))
 		{
-#if defined(USE_BARRIER_SMGRRELEASE)
-			/* Close all smgr fds in all backends. */
-			WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
-#endif
-
 			ResolveRecoveryConflictWithTablespace(xlrec->ts_id);
 
 			/*
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 84ce5a4a5d..1eb3988414 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -152,16 +152,7 @@
 #define EXEC_BACKEND
 #endif
 
-/*
- * If USE_BARRIER_SMGRRELEASE is defined, certain code paths that unlink
- * directories will ask other backends to close all smgr file descriptors.
- * This is enabled on Windows, because otherwise unlinked but still open files
- * can prevent rmdir(containing_directory) from succeeding.  On other
- * platforms, it can be defined to exercise those code paths.
- */
-#if defined(WIN32)
 #define USE_BARRIER_SMGRRELEASE
-#endif
 
 /*
  * Define this if your operating system supports link()
-- 
2.35.1

Reply via email to