On Tue, 2025-03-11 at 11:26 -0400, Tom Lane wrote:
> Right, that was what I was thinking, but hadn't had time to look in
> detail. The postDataBound dependency isn't real helpful here, we
> could lose that if we had the data dependency.
Attached a patch.
It's a bit messier than I expected, so I'm open to other suggestions.
The reason is because materialized view data is also pushed to
RESTORE_PASS_POST_ACL, so we need to do the same for the statistics
(otherwise the dependency is just ignored).
Regards,
Jeff Davis
From 4e84889cb890e5e855559191e6ca8d152e2495e0 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Tue, 11 Mar 2025 13:51:25 -0700
Subject: [PATCH v1] Make MV statistics depend on MV data.
REFRESH MATERIALIZED VIEW replaces the storage, which resets
statistics, so statistics must be restored afterward.
If statistics are for a materialized view, and if the materialized
view data is being dumped, create a dependency from the former
referring to the latter. Defer the statistics to SECTION_POST_DATA,
which is where the materialized view data goes.
In that case, it also requires deferring the statistics to
RESTORE_PASS_POST_ACL.
Reported-by: Ashutosh Bapat <[email protected]>
Discussion: https://postgr.es/m/caexhw5s47kmubpbbrjzsm-zfe0tj2o3gbagb7yaye8rq-v2...@mail.gmail.com
---
src/bin/pg_dump/pg_backup_archiver.c | 46 ++++++++----
src/bin/pg_dump/pg_dump.c | 104 +++++++++++++++------------
src/bin/pg_dump/pg_dump.h | 3 +-
src/bin/pg_dump/pg_dump_sort.c | 2 +-
4 files changed, 95 insertions(+), 60 deletions(-)
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 7480e122b61..ace2f203497 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -72,7 +72,7 @@ static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te);
static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te);
static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te);
static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH);
-static RestorePass _tocEntryRestorePass(TocEntry *te);
+static RestorePass _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te);
static bool _tocEntryIsACL(TocEntry *te);
static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
@@ -102,7 +102,8 @@ static void pending_list_append(TocEntry *l, TocEntry *te);
static void pending_list_remove(TocEntry *te);
static int TocEntrySizeCompareQsort(const void *p1, const void *p2);
static int TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg);
-static void move_to_ready_heap(TocEntry *pending_list,
+static void move_to_ready_heap(ArchiveHandle *AH,
+ TocEntry *pending_list,
binaryheap *ready_heap,
RestorePass pass);
static TocEntry *pop_next_work_item(binaryheap *ready_heap,
@@ -747,7 +748,7 @@ RestoreArchive(Archive *AHX)
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0)
continue; /* ignore if not to be dumped at all */
- switch (_tocEntryRestorePass(te))
+ switch (_tocEntryRestorePass(AH, te))
{
case RESTORE_PASS_MAIN:
(void) restore_toc_entry(AH, te, false);
@@ -766,7 +767,7 @@ RestoreArchive(Archive *AHX)
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 &&
- _tocEntryRestorePass(te) == RESTORE_PASS_ACL)
+ _tocEntryRestorePass(AH, te) == RESTORE_PASS_ACL)
(void) restore_toc_entry(AH, te, false);
}
}
@@ -776,7 +777,7 @@ RestoreArchive(Archive *AHX)
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 &&
- _tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL)
+ _tocEntryRestorePass(AH, te) == RESTORE_PASS_POST_ACL)
(void) restore_toc_entry(AH, te, false);
}
}
@@ -3212,7 +3213,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
* See notes with the RestorePass typedef in pg_backup_archiver.h.
*/
static RestorePass
-_tocEntryRestorePass(TocEntry *te)
+_tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
{
/* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */
if (strcmp(te->desc, "ACL") == 0 ||
@@ -3233,6 +3234,26 @@ _tocEntryRestorePass(TocEntry *te)
strncmp(te->tag, "EVENT TRIGGER ", 14) == 0)
return RESTORE_PASS_POST_ACL;
+ /*
+ * If statistics data is dependent on materialized view data, it must be
+ * deferred to RESTORE_PASS_POST_ACL.
+ */
+ if (strcmp(te->desc, "STATISTICS DATA") == 0)
+ {
+ for (int i = 0; i < te->nDeps; i++)
+ {
+ DumpId depid = te->dependencies[i];
+
+ if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
+ {
+ TocEntry *otherte = AH->tocsByDumpId[depid];
+
+ if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0)
+ return RESTORE_PASS_POST_ACL;
+ }
+ }
+ }
+
/* All else can be handled in the main pass. */
return RESTORE_PASS_MAIN;
}
@@ -4242,7 +4263,7 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list)
* not set skipped_some in this case, since by assumption no main-pass
* items could depend on these.
*/
- if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN)
+ if (_tocEntryRestorePass(AH, next_work_item) != RESTORE_PASS_MAIN)
do_now = false;
if (do_now)
@@ -4324,7 +4345,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
* process in the current restore pass.
*/
AH->restorePass = RESTORE_PASS_MAIN;
- move_to_ready_heap(pending_list, ready_heap, AH->restorePass);
+ move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass);
/*
* main parent loop
@@ -4373,7 +4394,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
/* Advance to next restore pass */
AH->restorePass++;
/* That probably allows some stuff to be made ready */
- move_to_ready_heap(pending_list, ready_heap, AH->restorePass);
+ move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass);
/* Loop around to see if anything's now ready */
continue;
}
@@ -4544,7 +4565,8 @@ TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg)
* which applies the same logic one-at-a-time.)
*/
static void
-move_to_ready_heap(TocEntry *pending_list,
+move_to_ready_heap(ArchiveHandle *AH,
+ TocEntry *pending_list,
binaryheap *ready_heap,
RestorePass pass)
{
@@ -4557,7 +4579,7 @@ move_to_ready_heap(TocEntry *pending_list,
next_te = te->pending_next;
if (te->depCount == 0 &&
- _tocEntryRestorePass(te) == pass)
+ _tocEntryRestorePass(AH, te) == pass)
{
/* Remove it from pending_list ... */
pending_list_remove(te);
@@ -4951,7 +4973,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te,
* memberships changed.
*/
if (otherte->depCount == 0 &&
- _tocEntryRestorePass(otherte) == AH->restorePass &&
+ _tocEntryRestorePass(AH, otherte) == AH->restorePass &&
otherte->pending_prev != NULL &&
ready_heap != NULL)
{
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index c371570501a..7b442d8391d 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -2964,6 +2964,21 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo)
tbinfo->dataObj = tdinfo;
+ /*
+ * If statistics and data are being dumped for a materialized view, the
+ * statistics depend on the materialized view data. That's because REFRESH
+ * MATERIALIZED VIEW will completely replace the storage and reset the
+ * stats.
+ *
+ * The dependency is added here because the statistics objects are created
+ * first.
+ */
+ if (tbinfo->relkind == RELKIND_MATVIEW && tbinfo->stats != NULL)
+ {
+ tbinfo->stats->section = SECTION_POST_DATA;
+ addObjectDependency(&tbinfo->stats->dobj, tdinfo->dobj.dumpId);
+ }
+
/* Make sure that we'll collect per-column info for this table. */
tbinfo->interesting = true;
}
@@ -6850,7 +6865,34 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages,
info->relkind = relkind;
info->indAttNames = indAttNames;
info->nindAttNames = nindAttNames;
- info->postponed_def = false;
+
+ /*
+ * Ordinarily, stats go in SECTION_DATA for tables and
+ * SECTION_POST_DATA for indexes.
+ *
+ * However, the section may be updated later for materialized views.
+ * Matview stats depend on the matview data, because REFRESH
+ * MATERIALIZED VIEW replaces the storage and resets the stats, and
+ * the matview data is in SECTION_POST_DATA. Also, the materialized
+ * view definition may be postponed from SECTION_PRE_DATA to
+ * SECTION_POST_DATA to resolve some kinds of dependency problems (see
+ * repairMatViewBoundaryMultiLoop()).
+ */
+ switch (info->relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
+ case RELKIND_MATVIEW:
+ info->section = SECTION_DATA;
+ break;
+ case RELKIND_INDEX:
+ case RELKIND_PARTITIONED_INDEX:
+ info->section = SECTION_POST_DATA;
+ break;
+ default:
+ pg_fatal("cannot dump statistics for relation kind '%c'",
+ info->relkind);
+ }
return info;
}
@@ -7249,9 +7291,17 @@ getTables(Archive *fout, int *numTables)
/* Add statistics */
if (tblinfo[i].interesting)
- getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relpages,
- PQgetvalue(res, i, i_reltuples),
- relallvisible, tblinfo[i].relkind, NULL, 0);
+ {
+ RelStatsInfo *stats;
+
+ stats = getRelationStatistics(fout, &tblinfo[i].dobj,
+ tblinfo[i].relpages,
+ PQgetvalue(res, i, i_reltuples),
+ relallvisible,
+ tblinfo[i].relkind, NULL, 0);
+ if (tblinfo[i].relkind == RELKIND_MATVIEW)
+ tblinfo[i].stats = stats;
+ }
/*
* Read-lock target tables to make sure they aren't DROPPED or altered
@@ -10448,34 +10498,6 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
appendPQExpBuffer(out, "::%s", argtype);
}
-/*
- * Decide which section to use based on the relkind of the parent object.
- *
- * NB: materialized views may be postponed from SECTION_PRE_DATA to
- * SECTION_POST_DATA to resolve some kinds of dependency problems. If so, the
- * matview stats will also be postponed to SECTION_POST_DATA. See
- * repairMatViewBoundaryMultiLoop().
- */
-static teSection
-statisticsDumpSection(const RelStatsInfo *rsinfo)
-{
- switch (rsinfo->relkind)
- {
- case RELKIND_RELATION:
- case RELKIND_PARTITIONED_TABLE:
- case RELKIND_MATVIEW:
- return SECTION_DATA;
- case RELKIND_INDEX:
- case RELKIND_PARTITIONED_INDEX:
- return SECTION_POST_DATA;
- default:
- pg_fatal("cannot dump statistics for relation kind '%c'",
- rsinfo->relkind);
- }
-
- return 0; /* keep compiler quiet */
-}
-
/*
* dumpRelationStats --
*
@@ -10488,8 +10510,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
PGresult *res;
PQExpBuffer query;
PQExpBuffer out;
- DumpId *deps = NULL;
- int ndeps = 0;
char *qualified_name;
int i_attname;
int i_inherited;
@@ -10511,13 +10531,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
if (!fout->dopt->dumpStatistics)
return;
- /* dependent on the relation definition, if doing schema */
- if (fout->dopt->dumpSchema)
- {
- deps = dobj->dependencies;
- ndeps = dobj->nDeps;
- }
-
query = createPQExpBuffer();
if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
{
@@ -10690,11 +10703,10 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
ARCHIVE_OPTS(.tag = dobj->name,
.namespace = dobj->namespace->dobj.name,
.description = "STATISTICS DATA",
- .section = rsinfo->postponed_def ?
- SECTION_POST_DATA : statisticsDumpSection(rsinfo),
+ .section = rsinfo->section,
.createStmt = out->data,
- .deps = deps,
- .nDeps = ndeps));
+ .deps = dobj->dependencies,
+ .nDeps = dobj->nDeps));
free(qualified_name);
destroyPQExpBuffer(out);
@@ -19383,7 +19395,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
break;
case DO_REL_STATS:
/* stats section varies by parent object type, DATA or POST */
- if (statisticsDumpSection((RelStatsInfo *) dobj) == SECTION_DATA)
+ if (((RelStatsInfo *) dobj)->section == SECTION_DATA)
{
addObjectDependency(dobj, preDataBound->dumpId);
addObjectDependency(postDataBound, dobj->dumpId);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index bbdb30b5f54..70f7a369e4a 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -369,6 +369,7 @@ typedef struct _tableInfo
bool *notnull_islocal; /* true if NOT NULL has local definition */
struct _attrDefInfo **attrdefs; /* DEFAULT expressions */
struct _constraintInfo *checkexprs; /* CHECK constraints */
+ struct _relStatsInfo *stats; /* only set for matviews */
bool needs_override; /* has GENERATED ALWAYS AS IDENTITY */
char *amname; /* relation access method */
@@ -449,7 +450,7 @@ typedef struct _relStatsInfo
*/
char **indAttNames; /* attnames of the index, in order */
int32 nindAttNames; /* number of attnames stored (can be 0) */
- bool postponed_def; /* stats must be postponed into post-data */
+ teSection section; /* stats may appear in data or post-data */
} RelStatsInfo;
typedef struct _statsExtInfo
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index f75e9928bad..0b0977788f1 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -820,7 +820,7 @@ repairMatViewBoundaryMultiLoop(DumpableObject *boundaryobj,
RelStatsInfo *nextinfo = (RelStatsInfo *) nextobj;
if (nextinfo->relkind == RELKIND_MATVIEW)
- nextinfo->postponed_def = true;
+ nextinfo->section = SECTION_POST_DATA;
}
}
--
2.34.1