Hi, I created a .patch that will allow me to recover the stat files after a potential crash. Depending on the traffic on the server some records might be lost (0.5 sec of records / more or less ? ). >From what I read it is still better than no stat files at all.
I restricted it to the default recovery scenario only (RECOVERY_TARGET_TIMELINE_LATEST) to avoid having invalid stats files with other recovery options. Am I missing something ? File integrity should be fine because of renaming.
--- a/src/include/pgstat.h 2022-02-22 22:22:22.222222222 +0200 +++ b/src/include/pgstat.h 2022-02-22 22:22:22.222222222 +0200 @@ -29,6 +29,7 @@ #define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat" #define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat" #define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp" +#define PGSTAT_STAT_RECOVERY_FILENAME "pg_stat/recovery" /* Default directory to store temporary statistics data in */ #define PG_STAT_TMP_DIR "pg_stat_tmp" @@ -1091,6 +1092,7 @@ extern void pgstat_init(void); extern int pgstat_start(void); extern void pgstat_reset_all(void); +extern void pgstat_create_recovery_file(void); extern void allow_immediate_pgstat_restart(void); #ifdef EXEC_BACKEND --- a/src/backend/access/transam/xlog.c 2022-02-22 22:22:22.222222222 +0200 +++ b/src/backend/access/transam/xlog.c 2022-02-22 22:22:22.222222222 +0200 @@ -5195,7 +5195,16 @@ /* * Reset pgstat data, because it may be invalid after recovery. */ - pgstat_reset_all(); + if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST) + { + elog(WARNING, "Doing recovery"); + pgstat_create_recovery_file(); + } + else + { + elog(WARNING, "Reseting recovery files"); + pgstat_reset_all(); + } /* * If there was a backup label file, it's done its job and the info --- a/src/backend/postmaster/pgstat.c 2022-02-22 22:22:22.222222222 +0200 +++ b/src/backend/postmaster/pgstat.c 2022-02-22 22:22:22.222222222 +0200 @@ -739,6 +739,54 @@ pgstat_reset_remove_files(PGSTAT_STAT_PERMANENT_DIRECTORY); } +static bool +pgstat_check_recovery_file_exists() +{ + const char *stat_rec_file = PGSTAT_STAT_RECOVERY_FILENAME; + return (unlink(stat_rec_file) == 0); +} + +void +pgstat_create_recovery_file(void) +{ + FILE *fpout; + const char *stat_rec_file = PGSTAT_STAT_RECOVERY_FILENAME; + + elog(WARNING, "writing stats recovery file \"%s\"", stat_rec_file); + + /* + * Open the statistics recovery file to touch it. + */ + fpout = AllocateFile(stat_rec_file, PG_BINARY_W); + if (fpout == NULL) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open stats recovery file \"%s\": %m", + stat_rec_file))); + return; + } + + if (ferror(fpout)) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write stats recovery file \"%s\": %m", + stat_rec_file))); + FreeFile(fpout); + unlink(stat_rec_file); + } + else if (FreeFile(fpout) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not close stats recovery file \"%s\": %m", + stat_rec_file))); + unlink(stat_rec_file); + } + +} + #ifdef EXEC_BACKEND /* @@ -3525,7 +3573,8 @@ * Read in existing stats files or initialize the stats to zero. */ pgStatRunningInCollector = true; - pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true); + bool recFile = pgstat_check_recovery_file_exists(); + pgStatDBHash = pgstat_read_statsfiles(InvalidOid, recFile ? false : true, true); /* Prepare to wait for our latch or data in our socket. */ wes = CreateWaitEventSet(CurrentMemoryContext, 3);