On Wed, Mar 12, 2025 at 08:41:29PM -0400, Tom Lane wrote:
> So this may have been going on for quite some time without our
> noticing.  The "corrupted statistics file" whine is most likely
> caused by pg_upgrade copying the old system's pgstat.stat file
> into the new installation --- is that a good idea?  I have
> no idea what's causing the redo LSN complaint, but it seems
> like that might deserve closer investigation.

Playing catch-up with various things, my apologies for the lag.

We do not copy the stats file from the old to the new node AFAIK.
This would not work anyway as the old file would fail to load on the
new node when starting the new server due to PGSTAT_FILE_FORMAT_ID
that does not match.  Note that the LSN check is happening after the
version check.

The complaint is coming from the firect control file manipulations
that pg_upgrade does, which are incompatible with what the stats file
stores, like this one with pg_resetwal that forces the new cluster to
use a redo LSN newer than anything that has been copied from the old
cluster so as the page LSNs can remain consistent: 
    exec_prog(UTILITY_LOG_FILE, NULL, true, true,
    /* use timeline 1 to match controldata and no WAL history file */
              "\"%s/pg_resetwal\" -l 00000001%s \"%s\"", new_cluster.bindir,
              old_cluster.controldata.nextxlogfile + 8,
              new_cluster.pgdata);

b860848232aa exists because I've been trying to make the handling of
the stats file more durable by forcing it to be flushed at each
checkpoint, where I've found this check to be independently useful.

Let's remove it for this release.  Perhaps we will not even need this 
part if we are able to rebuild the most critical stats from WAL after
a crash.  This itself needs more work, one point mentioned being to
move some table stats at the level of its relfilenode(s) so as we
could catch up on the data in the startup process when recovering.
Note the bump of PGSTAT_FILE_FORMAT_ID that's required by removing
this LSN.

With all that said, please see the attached that I'm planning to do.
--
Michael
From dd72c59d7a7295262d080b063678f82e548bb50f Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Sat, 15 Mar 2025 10:55:41 +0900
Subject: [PATCH] Revert "Add redo LSN to pgstats files"

This reverts commit b860848232aab440c9ac4c5de3563565e3d2934b.
---
 src/include/pgstat.h                |  5 ++--
 src/backend/access/transam/xlog.c   |  2 +-
 src/backend/utils/activity/pgstat.c | 41 +++++++----------------------
 3 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index def6b370ac11..b01875d5b165 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -11,7 +11,6 @@
 #ifndef PGSTAT_H
 #define PGSTAT_H
 
-#include "access/xlogdefs.h"
 #include "datatype/timestamp.h"
 #include "portability/instr_time.h"
 #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */
@@ -212,7 +211,7 @@ typedef struct PgStat_TableXactStatus
  * ------------------------------------------------------------
  */
 
-#define PGSTAT_FILE_FORMAT_ID  0x01A5BCB5
+#define PGSTAT_FILE_FORMAT_ID  0x01A5BCB6
 
 typedef struct PgStat_ArchiverStats
 {
@@ -514,7 +513,7 @@ extern Size StatsShmemSize(void);
 extern void StatsShmemInit(void);
 
 /* Functions called during server startup / shutdown */
-extern void pgstat_restore_stats(XLogRecPtr redo);
+extern void pgstat_restore_stats(void);
 extern void pgstat_discard_stats(void);
 extern void pgstat_before_server_shutdown(int code, Datum arg);
 
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 799fc739e18c..4b6c694a3f71 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5728,7 +5728,7 @@ StartupXLOG(void)
        if (didCrash)
                pgstat_discard_stats();
        else
-               pgstat_restore_stats(checkPoint.redo);
+               pgstat_restore_stats();
 
        lastFullPageWrites = checkPoint.fullPageWrites;
 
diff --git a/src/backend/utils/activity/pgstat.c 
b/src/backend/utils/activity/pgstat.c
index 3168b825e25f..40063085073f 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -104,7 +104,6 @@
 #include <unistd.h>
 
 #include "access/xact.h"
-#include "access/xlog.h"
 #include "lib/dshash.h"
 #include "pgstat.h"
 #include "storage/fd.h"
@@ -181,8 +180,8 @@ typedef struct PgStat_SnapshotEntry
  * ----------
  */
 
-static void pgstat_write_statsfile(XLogRecPtr redo);
-static void pgstat_read_statsfile(XLogRecPtr redo);
+static void pgstat_write_statsfile(void);
+static void pgstat_read_statsfile(void);
 
 static void pgstat_init_snapshot_fixed(void);
 
@@ -503,9 +502,9 @@ static const PgStat_KindInfo **pgstat_kind_custom_infos = 
NULL;
  * Should only be called by the startup process or in single user mode.
  */
 void
-pgstat_restore_stats(XLogRecPtr redo)
+pgstat_restore_stats(void)
 {
-       pgstat_read_statsfile(redo);
+       pgstat_read_statsfile();
 }
 
 /*
@@ -581,7 +580,7 @@ pgstat_before_server_shutdown(int code, Datum arg)
        if (code == 0)
        {
                pgStatLocal.shmem->is_shutdown = true;
-               pgstat_write_statsfile(GetRedoRecPtr());
+               pgstat_write_statsfile();
        }
 }
 
@@ -1585,7 +1584,7 @@ write_chunk(FILE *fpout, void *ptr, size_t len)
  * stats so locking is not required.
  */
 static void
-pgstat_write_statsfile(XLogRecPtr redo)
+pgstat_write_statsfile(void)
 {
        FILE       *fpout;
        int32           format_id;
@@ -1602,8 +1601,7 @@ pgstat_write_statsfile(XLogRecPtr redo)
        /* we're shutting down, so it's ok to just override this */
        pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
 
-       elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
-                LSN_FORMAT_ARGS(redo));
+       elog(DEBUG2, "writing stats file \"%s\"", statfile);
 
        /*
         * Open the statistics temp file to write out the current values.
@@ -1624,9 +1622,6 @@ pgstat_write_statsfile(XLogRecPtr redo)
        format_id = PGSTAT_FILE_FORMAT_ID;
        write_chunk_s(fpout, &format_id);
 
-       /* Write the redo LSN, used to cross check the file read */
-       write_chunk_s(fpout, &redo);
-
        /* Write various stats structs for fixed number of objects */
        for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; 
kind++)
        {
@@ -1773,20 +1768,18 @@ read_chunk(FILE *fpin, void *ptr, size_t len)
  * stats so locking is not required.
  */
 static void
-pgstat_read_statsfile(XLogRecPtr redo)
+pgstat_read_statsfile(void)
 {
        FILE       *fpin;
        int32           format_id;
        bool            found;
        const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
        PgStat_ShmemControl *shmem = pgStatLocal.shmem;
-       XLogRecPtr      file_redo;
 
        /* shouldn't be called from postmaster */
        Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
 
-       elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile,
-                LSN_FORMAT_ARGS(redo));
+       elog(DEBUG2, "reading stats file \"%s\"", statfile);
 
        /*
         * Try to open the stats file. If it doesn't exist, the backends simply
@@ -1824,22 +1817,6 @@ pgstat_read_statsfile(XLogRecPtr redo)
                goto error;
        }
 
-       /*
-        * Read the redo LSN stored in the file.
-        */
-       if (!read_chunk_s(fpin, &file_redo))
-       {
-               elog(WARNING, "could not read redo LSN");
-               goto error;
-       }
-
-       if (file_redo != redo)
-       {
-               elog(WARNING, "found incorrect redo LSN %X/%X (expected %X/%X)",
-                        LSN_FORMAT_ARGS(file_redo), LSN_FORMAT_ARGS(redo));
-               goto error;
-       }
-
        /*
         * We found an existing statistics file. Read it and put all the stats
         * data into place.
-- 
2.46.0

Attachment: signature.asc
Description: PGP signature

Reply via email to