Hi,

I created a .patch that will allow me to recover the stat files after a
potential crash.
Depending on the traffic on the server some records might be lost (0.5 sec
of records / more or less ? ).
>From what I read it is still better than no stat files at all.

I restricted it to the default recovery scenario only
(RECOVERY_TARGET_TIMELINE_LATEST) to avoid having invalid stats files with
other recovery options.

Am I missing something ? File integrity should be fine because of renaming.
--- a/src/include/pgstat.h	2022-02-22 22:22:22.222222222 +0200
+++ b/src/include/pgstat.h	2022-02-22 22:22:22.222222222 +0200
@@ -29,6 +29,7 @@
 #define PGSTAT_STAT_PERMANENT_DIRECTORY		"pg_stat"
 #define PGSTAT_STAT_PERMANENT_FILENAME		"pg_stat/global.stat"
 #define PGSTAT_STAT_PERMANENT_TMPFILE		"pg_stat/global.tmp"
+#define PGSTAT_STAT_RECOVERY_FILENAME		"pg_stat/recovery"
 
 /* Default directory to store temporary statistics data in */
 #define PG_STAT_TMP_DIR		"pg_stat_tmp"
@@ -1091,6 +1092,7 @@
 extern void pgstat_init(void);
 extern int	pgstat_start(void);
 extern void pgstat_reset_all(void);
+extern void pgstat_create_recovery_file(void);
 extern void allow_immediate_pgstat_restart(void);
 
 #ifdef EXEC_BACKEND
--- a/src/backend/access/transam/xlog.c	2022-02-22 22:22:22.222222222 +0200
+++ b/src/backend/access/transam/xlog.c	2022-02-22 22:22:22.222222222 +0200
@@ -5195,7 +5195,16 @@
 		/*
 		 * Reset pgstat data, because it may be invalid after recovery.
 		 */
-		pgstat_reset_all();
+		if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
+		{
+			elog(WARNING, "Doing recovery");
+			pgstat_create_recovery_file();
+		}
+		else
+		{
+			elog(WARNING, "Reseting recovery files");
+			pgstat_reset_all();
+		}
 
 		/*
 		 * If there was a backup label file, it's done its job and the info
--- a/src/backend/postmaster/pgstat.c	2022-02-22 22:22:22.222222222 +0200
+++ b/src/backend/postmaster/pgstat.c	2022-02-22 22:22:22.222222222 +0200
@@ -739,6 +739,54 @@
 	pgstat_reset_remove_files(PGSTAT_STAT_PERMANENT_DIRECTORY);
 }
 
+static bool
+pgstat_check_recovery_file_exists()
+{
+		const char *stat_rec_file = PGSTAT_STAT_RECOVERY_FILENAME;
+		return (unlink(stat_rec_file) == 0);
+}
+
+void
+pgstat_create_recovery_file(void)
+{
+	FILE	   *fpout;
+	const char *stat_rec_file = PGSTAT_STAT_RECOVERY_FILENAME;
+
+	elog(WARNING, "writing stats recovery file \"%s\"", stat_rec_file);
+
+	/*
+	 * Open the statistics recovery file to touch it.
+	 */
+	fpout = AllocateFile(stat_rec_file, PG_BINARY_W);
+	if (fpout == NULL)
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not open stats recovery file \"%s\": %m",
+						stat_rec_file)));
+		return;
+	}
+
+	if (ferror(fpout))
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not write stats recovery file \"%s\": %m",
+						stat_rec_file)));
+		FreeFile(fpout);
+		unlink(stat_rec_file);
+	}
+	else if (FreeFile(fpout) < 0)
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not close stats recovery file \"%s\": %m",
+						stat_rec_file)));
+		unlink(stat_rec_file);
+	}
+
+}
+
 #ifdef EXEC_BACKEND
 
 /*
@@ -3525,7 +3573,8 @@
 	 * Read in existing stats files or initialize the stats to zero.
 	 */
 	pgStatRunningInCollector = true;
-	pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
+	bool recFile = pgstat_check_recovery_file_exists();
+	pgStatDBHash = pgstat_read_statsfiles(InvalidOid, recFile ? false : true, true);
 
 	/* Prepare to wait for our latch or data in our socket. */
 	wes = CreateWaitEventSet(CurrentMemoryContext, 3);

Reply via email to