So [1] on its own didn't fix this.  My next guess is that the attached
might help.

Hmm.  Following Michael's clue that this might involve log files and
pg_ctl, I noticed one thing: pg_ctl implements
wait_for_postmaster_stop() by waiting for kill(pid, 0) to fail, and
our kill emulation does CallNamedPipe().  If the server is in the
process of exiting and the kernel is cleaning up all the handles we
didn't close, is there any reason to expect the signal pipe to be
closed after the log file?

[1] 
https://www.postgresql.org/message-id/flat/20221025213055.GA8537%40telsasoft.com#9030de6c4c5e544d2b057b793a5b42af
From 9cbfd5c1fc1327443152d89ca7f5346bfeda3f52 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Mon, 7 Nov 2022 17:43:42 +1300
Subject: [PATCH] Refactor rmtree() to use get_dirent_type().

Switch to get_dirent_type() instead of lstat() while traversing a
directory graph, to see if that fixes intermittent ENOTEMPTY failures
while cleaning up after the pg_upgrade tests.

The idea is that the offending directory entries must be
STATUS_DELETE_PENDING.  With this change, rmtree() should not skip them,
and try to unlink again.  That should either wait a short time for them
to go away (because other processes close the handle) or log a message
to tell us the path of the problem file if not so we can dig further.

Discussion: https://postgre.es/m/20220919213217.ptqfdlcc5idk5xup%40awork3.anarazel.de
---
 src/common/rmtree.c | 94 ++++++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 57 deletions(-)

diff --git a/src/common/rmtree.c b/src/common/rmtree.c
index 221d0e20a7..736e34892c 100644
--- a/src/common/rmtree.c
+++ b/src/common/rmtree.c
@@ -20,13 +20,16 @@
 #include <unistd.h>
 #include <sys/stat.h>
 
+#include "common/file_utils.h"
+
 #ifndef FRONTEND
 #define pg_log_warning(...) elog(WARNING, __VA_ARGS__)
+#define LOG_LEVEL WARNING
 #else
 #include "common/logging.h"
+#define LOG_LEVEL PG_LOG_WARNING
 #endif
 
-
 /*
  *	rmtree
  *
@@ -41,82 +44,59 @@
 bool
 rmtree(const char *path, bool rmtopdir)
 {
-	bool		result = true;
 	char		pathbuf[MAXPGPATH];
-	char	  **filenames;
-	char	  **filename;
-	struct stat statbuf;
-
-	/*
-	 * we copy all the names out of the directory before we start modifying
-	 * it.
-	 */
-	filenames = pgfnames(path);
+	DIR		   *dir;
+	struct dirent *de;
+	bool		result = true;
 
-	if (filenames == NULL)
+	dir = opendir(path);
+	if (dir == NULL)
+	{
+		pg_log_warning("could not open directory \"%s\": %m", path);
 		return false;
+	}
 
-	/* now we have the names we can start removing things */
-	for (filename = filenames; *filename; filename++)
+	while (errno = 0, (de = readdir(dir)))
 	{
-		snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
-
-		/*
-		 * It's ok if the file is not there anymore; we were just about to
-		 * delete it anyway.
-		 *
-		 * This is not an academic possibility. One scenario where this
-		 * happens is when bgwriter has a pending unlink request for a file in
-		 * a database that's being dropped. In dropdb(), we call
-		 * ForgetDatabaseSyncRequests() to flush out any such pending unlink
-		 * requests, but because that's asynchronous, it's not guaranteed that
-		 * the bgwriter receives the message in time.
-		 */
-		if (lstat(pathbuf, &statbuf) != 0)
-		{
-			if (errno != ENOENT)
-			{
-				pg_log_warning("could not stat file or directory \"%s\": %m",
-							   pathbuf);
-				result = false;
-			}
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
 			continue;
-		}
-
-		if (S_ISDIR(statbuf.st_mode))
-		{
-			/* call ourselves recursively for a directory */
-			if (!rmtree(pathbuf, true))
-			{
-				/* we already reported the error */
-				result = false;
-			}
-		}
-		else
+		snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
+		switch (get_dirent_type(pathbuf, de, false, LOG_LEVEL))
 		{
-			if (unlink(pathbuf) != 0)
-			{
-				if (errno != ENOENT)
+			case PGFILETYPE_ERROR:
+				/* already logged, press on */
+				break;
+			case PGFILETYPE_DIR:
+				if (!rmtree(pathbuf, true))
+					result = false;
+				break;
+			default:
+				if (unlink(pathbuf) != 0 && errno != ENOENT)
 				{
-					pg_log_warning("could not remove file or directory \"%s\": %m",
-								   pathbuf);
+					pg_log_warning("could not unlink file \"%s\": %m", pathbuf);
 					result = false;
 				}
-			}
+				break;
 		}
 	}
 
+	if (errno != 0)
+	{
+		pg_log_warning("could not read directory \"%s\": %m", path);
+		result = false;
+	}
+
+	closedir(dir);
+
 	if (rmtopdir)
 	{
 		if (rmdir(path) != 0)
 		{
-			pg_log_warning("could not remove file or directory \"%s\": %m",
-						   path);
+			pg_log_warning("could not remove directory \"%s\": %m", path);
 			result = false;
 		}
 	}
 
-	pgfnames_cleanup(filenames);
-
 	return result;
 }
-- 
2.30.2

Reply via email to