On Fri, Jul 05, 2024 at 01:52:31PM +0900, Michael Paquier wrote:
> On Sat, Jun 29, 2024 at 11:13:04PM +0200, Tomas Vondra wrote:
>> I think those are two independent issues - knowing that the snapshot is
>> from the last checkpoint, and knowing that it's correct (not corrupted).
>> And yeah, we should be careful about fsync/durable_rename.
> 
> Yeah, that's bugging me as well.  I don't really get why we would not
> want durability at shutdown for this data.  So how about switching the
> end of pgstat_write_statsfile() to use durable_rename()?  Sounds like
> an independent change, worth on its own.

Please find attached a rebased patch set with the durability point
addressed in 0001.  There were also some conflicts.

Note that I have applied the previous 0002 adding an assert in
pgstat_write_statsfile() as 734c057a8935, as I've managed to break
again this assumption while hacking more on this area..
--
Michael
From b03087f1746233bce0d1fbbb69789795296e779b Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Fri, 12 Jul 2024 15:31:27 +0900
Subject: [PATCH v2 1/4] Make write of pgstats file durable

This switches the pgstats code to use durable_rename() rather than
rename(), to make sure that the stats file is not lost should the host
by plugged off after PostgreSQL has been stopped.
---
 src/backend/utils/activity/pgstat.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index ed7baa6e04..66cda798fb 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -1478,12 +1478,9 @@ pgstat_write_statsfile(void)
 						tmpfile)));
 		unlink(tmpfile);
 	}
-	else if (rename(tmpfile, statfile) < 0)
+	else if (durable_rename(tmpfile, statfile, LOG) < 0)
 	{
-		ereport(LOG,
-				(errcode_for_file_access(),
-				 errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
-						tmpfile, statfile)));
+		/* error logged already */
 		unlink(tmpfile);
 	}
 }
-- 
2.45.2

From 4340ebd842049b5c706ada324bba36a5c5a78bb9 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Fri, 12 Jul 2024 15:22:12 +0900
Subject: [PATCH v2 2/4] Add redo LSN to pgstats file

This is used in the startup process to check that the file we are
loading is the one referring to the latest checkpoint.

Bump PGSTAT_FILE_FORMAT_ID.
---
 src/include/pgstat.h                |  5 +++--
 src/backend/access/transam/xlog.c   |  2 +-
 src/backend/utils/activity/pgstat.c | 26 +++++++++++++++++++-------
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 6b99bb8aad..043d39a565 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -11,6 +11,7 @@
 #ifndef PGSTAT_H
 #define PGSTAT_H
 
+#include "access/xlogdefs.h"
 #include "datatype/timestamp.h"
 #include "portability/instr_time.h"
 #include "postmaster/pgarch.h"	/* for MAX_XFN_CHARS */
@@ -235,7 +236,7 @@ typedef struct PgStat_TableXactStatus
  * ------------------------------------------------------------
  */
 
-#define PGSTAT_FILE_FORMAT_ID	0x01A5BCAD
+#define PGSTAT_FILE_FORMAT_ID	0x01A5BCAE
 
 typedef struct PgStat_ArchiverStats
 {
@@ -466,7 +467,7 @@ extern Size StatsShmemSize(void);
 extern void StatsShmemInit(void);
 
 /* Functions called during server startup / shutdown */
-extern void pgstat_restore_stats(void);
+extern void pgstat_restore_stats(XLogRecPtr redo);
 extern void pgstat_discard_stats(void);
 extern void pgstat_before_server_shutdown(int code, Datum arg);
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 33e27a6e72..f137551e8d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5641,7 +5641,7 @@ StartupXLOG(void)
 	if (didCrash)
 		pgstat_discard_stats();
 	else
-		pgstat_restore_stats();
+		pgstat_restore_stats(checkPoint.redo);
 
 	lastFullPageWrites = checkPoint.fullPageWrites;
 
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 66cda798fb..2952604a51 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -94,6 +94,7 @@
 #include <unistd.h>
 
 #include "access/xact.h"
+#include "access/xlog.h"
 #include "lib/dshash.h"
 #include "pgstat.h"
 #include "port/atomics.h"
@@ -171,8 +172,8 @@ typedef struct PgStat_SnapshotEntry
  * ----------
  */
 
-static void pgstat_write_statsfile(void);
-static void pgstat_read_statsfile(void);
+static void pgstat_write_statsfile(XLogRecPtr redo);
+static void pgstat_read_statsfile(XLogRecPtr redo);
 
 static void pgstat_reset_after_failure(void);
 
@@ -448,9 +449,9 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
  * Should only be called by the startup process or in single user mode.
  */
 void
-pgstat_restore_stats(void)
+pgstat_restore_stats(XLogRecPtr redo)
 {
-	pgstat_read_statsfile();
+	pgstat_read_statsfile(redo);
 }
 
 /*
@@ -526,7 +527,7 @@ pgstat_before_server_shutdown(int code, Datum arg)
 	if (code == 0)
 	{
 		pgStatLocal.shmem->is_shutdown = true;
-		pgstat_write_statsfile();
+		pgstat_write_statsfile(GetRedoRecPtr());
 	}
 }
 
@@ -1349,7 +1350,7 @@ write_chunk(FILE *fpout, void *ptr, size_t len)
  * stats so locking is not required.
  */
 static void
-pgstat_write_statsfile(void)
+pgstat_write_statsfile(XLogRecPtr redo)
 {
 	FILE	   *fpout;
 	int32		format_id;
@@ -1387,6 +1388,9 @@ pgstat_write_statsfile(void)
 	format_id = PGSTAT_FILE_FORMAT_ID;
 	write_chunk_s(fpout, &format_id);
 
+	/* Write the redo LSN, used to cross check the file loaded */
+	write_chunk_s(fpout, &redo);
+
 	/* Write various stats structs for fixed number of objects */
 	for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
 	{
@@ -1501,13 +1505,14 @@ read_chunk(FILE *fpin, void *ptr, size_t len)
  * stats so locking is not required.
  */
 static void
-pgstat_read_statsfile(void)
+pgstat_read_statsfile(XLogRecPtr redo)
 {
 	FILE	   *fpin;
 	int32		format_id;
 	bool		found;
 	const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
 	PgStat_ShmemControl *shmem = pgStatLocal.shmem;
+	XLogRecPtr	file_redo;
 
 	/* shouldn't be called from postmaster */
 	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
@@ -1541,6 +1546,13 @@ pgstat_read_statsfile(void)
 		format_id != PGSTAT_FILE_FORMAT_ID)
 		goto error;
 
+	/*
+	 * Read the redo LSN stored in the file.
+	 */
+	if (!read_chunk_s(fpin, &file_redo) ||
+		file_redo != redo)
+		goto error;
+
 	/*
 	 * We found an existing statistics file. Read it and put all the stats
 	 * data into place.
-- 
2.45.2

From 54e5253ee94229ea7c0c39960c9f10d305c1ef21 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Tue, 18 Jun 2024 11:10:19 +0900
Subject: [PATCH v2 3/4] Add some DEBUG2 information about the redo LSN of the
 stats file

This is useful for.. Debugging.  How surprising.
---
 src/backend/utils/activity/pgstat.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 2952604a51..148dbf2c4a 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -1367,7 +1367,8 @@ pgstat_write_statsfile(XLogRecPtr redo)
 	/* we're shutting down, so it's ok to just override this */
 	pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
 
-	elog(DEBUG2, "writing stats file \"%s\"", statfile);
+	elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
+		 LSN_FORMAT_ARGS(redo));
 
 	/*
 	 * Open the statistics temp file to write out the current values.
@@ -1517,7 +1518,8 @@ pgstat_read_statsfile(XLogRecPtr redo)
 	/* shouldn't be called from postmaster */
 	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
 
-	elog(DEBUG2, "reading stats file \"%s\"", statfile);
+	elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile,
+		LSN_FORMAT_ARGS(redo));
 
 	/*
 	 * Try to open the stats file. If it doesn't exist, the backends simply
-- 
2.45.2

From beda72b0ee48dd62ed60205a9cb4ca1496b45da5 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Tue, 18 Jun 2024 14:49:33 +0900
Subject: [PATCH v2 4/4] Flush pgstats file during checkpoints

The flushes are done for non-shutdown checkpoints and restart points, so
as it is possible to keep some statistics around in the event of a
crash.  Keeping the before_shmem_exit() callback to flush the pgstats
file in a shutdown sequence is a bit easier than doing so at checkpoint,
as they may be stats to flush before we are completely done with the
shutdown checkpoint.  Keeping the callback also keeps the shutdown of
single-user mode simpler, where the stats also need to be flushed.

At the beginning of recovery, the stats file is loaded when the redo LSN
it stores matches with the point we are replaying from.
---
 src/include/pgstat.h                     |  4 +-
 src/backend/access/transam/xlog.c        | 27 ++++++++---
 src/backend/utils/activity/pgstat.c      | 62 ++++++------------------
 src/test/recovery/t/029_stats_restart.pl | 22 +++++++--
 4 files changed, 54 insertions(+), 61 deletions(-)

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 043d39a565..f780b56cc3 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -466,9 +466,9 @@ typedef struct PgStat_PendingWalStats
 extern Size StatsShmemSize(void);
 extern void StatsShmemInit(void);
 
-/* Functions called during server startup / shutdown */
+/* Functions called during server startup / checkpoint / shutdown */
 extern void pgstat_restore_stats(XLogRecPtr redo);
-extern void pgstat_discard_stats(void);
+extern void pgstat_flush_stats(XLogRecPtr redo);
 extern void pgstat_before_server_shutdown(int code, Datum arg);
 
 /* Functions for backend initialization */
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f137551e8d..9727b0f20c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5390,7 +5390,6 @@ StartupXLOG(void)
 	XLogCtlInsert *Insert;
 	CheckPoint	checkPoint;
 	bool		wasShutdown;
-	bool		didCrash;
 	bool		haveTblspcMap;
 	bool		haveBackupLabel;
 	XLogRecPtr	EndOfLog;
@@ -5510,10 +5509,7 @@ StartupXLOG(void)
 	{
 		RemoveTempXlogFiles();
 		SyncDataDirectory();
-		didCrash = true;
 	}
-	else
-		didCrash = false;
 
 	/*
 	 * Prepare for WAL recovery if needed.
@@ -5638,10 +5634,7 @@ StartupXLOG(void)
 	 * TODO: With a bit of extra work we could just start with a pgstat file
 	 * associated with the checkpoint redo location we're starting from.
 	 */
-	if (didCrash)
-		pgstat_discard_stats();
-	else
-		pgstat_restore_stats(checkPoint.redo);
+	pgstat_restore_stats(checkPoint.redo);
 
 	lastFullPageWrites = checkPoint.fullPageWrites;
 
@@ -7213,6 +7206,15 @@ CreateCheckPoint(int flags)
 	 */
 	END_CRIT_SECTION();
 
+	/*
+	 * The control file update is done, hence it is time to write some fresh
+	 * statistics.  Stats are flushed at shutdown by the checkpointer in a
+	 * dedicated before_shmem_exit callback, combined with sanity checks
+	 * related to the shutdown of pgstats.
+	 */
+	if (!shutdown)
+		pgstat_flush_stats(RedoRecPtr);
+
 	/*
 	 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
 	 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
@@ -7681,6 +7683,15 @@ CreateRestartPoint(int flags)
 	}
 	LWLockRelease(ControlFileLock);
 
+	/*
+	 * The control file update is done, hence it is time to write some fresh
+	 * statistics.  Stats are flushed at shutdown by the checkpointer in a
+	 * dedicated before_shmem_exit callback, combined with sanity checks
+	 * related to the shutdown of pgstats.
+	 */
+	if ((flags & CHECKPOINT_IS_SHUTDOWN) == 0)
+		pgstat_flush_stats(RedoRecPtr);
+
 	/*
 	 * Update the average distance between checkpoints/restartpoints if the
 	 * prior checkpoint exists.
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 148dbf2c4a..d5ca4936f1 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -10,9 +10,10 @@
  * statistics.
  *
  * Statistics are loaded from the filesystem during startup (by the startup
- * process), unless preceded by a crash, in which case all stats are
- * discarded. They are written out by the checkpointer process just before
- * shutting down, except when shutting down in immediate mode.
+ * process) if the stats file has a redo LSN that matches with the . They
+ * are written out by the checkpointer during checkpoints and restart points,
+ * as well as before shutting down, except when shutting down in immediate
+ * mode.
  *
  * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
  *
@@ -455,53 +456,19 @@ pgstat_restore_stats(XLogRecPtr redo)
 }
 
 /*
- * Remove the stats file.  This is currently used only if WAL recovery is
- * needed after a crash.
+ * Write stats in memory to disk.
  *
- * Should only be called by the startup process or in single user mode.
+ * This is called by the checkpointer or in single-user mode.
  */
 void
-pgstat_discard_stats(void)
+pgstat_flush_stats(XLogRecPtr redo)
 {
-	int			ret;
-
-	/* NB: this needs to be done even in single user mode */
-
-	ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
-	if (ret != 0)
-	{
-		if (errno == ENOENT)
-			elog(DEBUG2,
-				 "didn't need to unlink permanent stats file \"%s\" - didn't exist",
-				 PGSTAT_STAT_PERMANENT_FILENAME);
-		else
-			ereport(LOG,
-					(errcode_for_file_access(),
-					 errmsg("could not unlink permanent statistics file \"%s\": %m",
-							PGSTAT_STAT_PERMANENT_FILENAME)));
-	}
-	else
-	{
-		ereport(DEBUG2,
-				(errcode_for_file_access(),
-				 errmsg_internal("unlinked permanent statistics file \"%s\"",
-								 PGSTAT_STAT_PERMANENT_FILENAME)));
-	}
-
-	/*
-	 * Reset stats contents. This will set reset timestamps of fixed-numbered
-	 * stats to the current time (no variable stats exist).
-	 */
-	pgstat_reset_after_failure();
+	pgstat_write_statsfile(redo);
 }
 
 /*
  * pgstat_before_server_shutdown() needs to be called by exactly one process
- * during regular server shutdowns. Otherwise all stats will be lost.
- *
- * We currently only write out stats for proc_exit(0). We might want to change
- * that at some point... But right now pgstat_discard_stats() would be called
- * during the start after a disorderly shutdown, anyway.
+ * during regular server shutdowns.
  */
 void
 pgstat_before_server_shutdown(int code, Datum arg)
@@ -526,6 +493,9 @@ pgstat_before_server_shutdown(int code, Datum arg)
 	 */
 	if (code == 0)
 	{
+		/* we're shutting down, so it's ok to just override this */
+		pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
+
 		pgStatLocal.shmem->is_shutdown = true;
 		pgstat_write_statsfile(GetRedoRecPtr());
 	}
@@ -1346,8 +1316,8 @@ write_chunk(FILE *fpout, void *ptr, size_t len)
 #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
 
 /*
- * This function is called in the last process that is accessing the shared
- * stats so locking is not required.
+ * This function is called in the checkpointer or in single-user mode,
+ * so locking is not required.
  */
 static void
 pgstat_write_statsfile(XLogRecPtr redo)
@@ -1364,9 +1334,6 @@ pgstat_write_statsfile(XLogRecPtr redo)
 	/* should be called only by the checkpointer or single user mode */
 	Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
 
-	/* we're shutting down, so it's ok to just override this */
-	pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
-
 	elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
 		 LSN_FORMAT_ARGS(redo));
 
@@ -1423,7 +1390,6 @@ pgstat_write_statsfile(XLogRecPtr redo)
 		CHECK_FOR_INTERRUPTS();
 
 		/* we may have some "dropped" entries not yet removed, skip them */
-		Assert(!ps->dropped);
 		if (ps->dropped)
 			continue;
 
diff --git a/src/test/recovery/t/029_stats_restart.pl b/src/test/recovery/t/029_stats_restart.pl
index 93a7209f69..9956a6ab82 100644
--- a/src/test/recovery/t/029_stats_restart.pl
+++ b/src/test/recovery/t/029_stats_restart.pl
@@ -59,7 +59,7 @@ ok(-f "$og_stats", "origin stats file must exist");
 copy($og_stats, $statsfile) or die "Copy failed: $!";
 
 
-## test discarding of stats file after crash etc
+## test retention of stats file after crash etc
 
 $node->start;
 
@@ -69,12 +69,28 @@ is(have_stats('function', $dboid, $funcoid),
 	't', "$sect: function stats do exist");
 is(have_stats('relation', $dboid, $tableoid),
 	't', "$sect: relation stats do exist");
+# Flush the stats to disk.
+$node->psql('postgres', 'checkpoint');
 
 $node->stop('immediate');
 
-ok(!-f "$og_stats", "no stats file should exist after immediate shutdown");
+ok(-f "$og_stats", "stats file should exist after immediate shutdown");
 
-# copy the old stats back to test we discard stats after crash restart
+# Start once, there should be stats restored from the previous checkpoint.
+$node->start;
+
+$sect = "crashrecovery";
+is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
+is(have_stats('function', $dboid, $funcoid),
+	't', "$sect: function stats do exist");
+is(have_stats('relation', $dboid, $tableoid),
+	't', "$sect: relation stats do exist");
+
+$node->stop('immediate');
+
+# Copy the old stats back, and test that we discard stats after crash restart
+# because these don't match with the redo LSN stored in the stats file
+# generated by the previous checkpoint.
 copy($statsfile, $og_stats) or die "Copy failed: $!";
 
 $node->start;
-- 
2.45.2

Attachment: signature.asc
Description: PGP signature

Reply via email to