From 06aa5e704719167c6afc5c7f7b2f57f2be460cab Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Wed, 25 Feb 2026 15:56:14 -0500
Subject: [PATCH v3 2/3] pg_dump: Use tableid in getAttributeStats

The existing query for fetching attribute stats is clumsy for several
reasons. One is that the volume of stats returned is unpredictable and
could be very large, so stats must be fetched in medium-sized batches.
The other is that the stats fetching query is on pg_stats, which
historically does not expose tableid, requiring us to pass in an array
of schemanames and an array of tablenames and unnest them in pairs. This
results in a hash join which gives very poor performance, but adding an
extra qual was able to trick the query into using an existing index.
That trick always seems brittle because it is, and while it works on all
past versions, there is no guarantee that it will continue to work on
future versions.

With that in mind, change the pg_dump query to instead use tableid on
versions in which tableid is available in pg_stats. This virtually
guarantees that pg_statistic will use index lookups, eliminates the
"trick" qual mentioned above, and is just simpler.
---
 src/bin/pg_dump/pg_dump.c | 102 +++++++++++++++++++++++++++++++-------
 src/bin/pg_dump/pg_dump.h |   1 +
 2 files changed, 84 insertions(+), 19 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8bde1b382de..5961277d4ca 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -7237,6 +7237,7 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages,
 		dobj->components |= DUMP_COMPONENT_STATISTICS;
 		dobj->name = pg_strdup(rel->name);
 		dobj->namespace = rel->namespace;
+		info->starelid = rel->catId.oid;
 		info->relpages = relpages;
 		info->reltuples = pstrdup(reltuples);
 		info->relallvisible = relallvisible;
@@ -11131,8 +11132,9 @@ static PGresult *
 fetchAttributeStats(Archive *fout)
 {
 	ArchiveHandle *AH = (ArchiveHandle *) fout;
-	PQExpBuffer nspnames = createPQExpBuffer();
-	PQExpBuffer relnames = createPQExpBuffer();
+	PQExpBuffer nspnames = NULL;
+	PQExpBuffer relnames = NULL;
+	PQExpBuffer starelids = NULL;
 	int			count = 0;
 	PGresult   *res = NULL;
 	static TocEntry *te;
@@ -11166,8 +11168,18 @@ fetchAttributeStats(Archive *fout)
 		restarted = true;
 	}
 
-	appendPQExpBufferChar(nspnames, '{');
-	appendPQExpBufferChar(relnames, '{');
+	if (fout->remoteVersion >= 190000)
+	{
+		starelids = createPQExpBuffer();
+		appendPQExpBufferChar(starelids, '{');
+	}
+	else
+	{
+		nspnames = createPQExpBuffer();
+		relnames = createPQExpBuffer();
+		appendPQExpBufferChar(nspnames, '{');
+		appendPQExpBufferChar(relnames, '{');
+	}
 
 	/*
 	 * Scan the TOC for the next set of relevant stats entries.  We assume
@@ -11180,14 +11192,35 @@ fetchAttributeStats(Archive *fout)
 		if ((te->reqs & REQ_STATS) != 0 &&
 			strcmp(te->desc, "STATISTICS DATA") == 0)
 		{
-			appendPGArray(nspnames, te->namespace);
-			appendPGArray(relnames, te->tag);
+			if (fout->remoteVersion >= 190000)
+			{
+				RelStatsInfo *rsinfo = (RelStatsInfo *) te->defnDumperArg;
+
+				if (rsinfo == NULL)
+					pg_fatal("statistics table oid information missing for %s.%s",
+							 te->namespace, te->tag);
+
+				if (count > 0)
+					appendPQExpBufferChar(starelids, ',');
+				appendPQExpBuffer(starelids, "%u", rsinfo->starelid);
+			}
+			else
+			{
+				appendPGArray(nspnames, te->namespace);
+				appendPGArray(relnames, te->tag);
+			}
+
 			count++;
 		}
 	}
 
-	appendPQExpBufferChar(nspnames, '}');
-	appendPQExpBufferChar(relnames, '}');
+	if (fout->remoteVersion >= 190000)
+		appendPQExpBufferChar(starelids, '}');
+	else
+	{
+		appendPQExpBufferChar(nspnames, '}');
+		appendPQExpBufferChar(relnames, '}');
+	}
 
 	/* Execute the query for the next batch of relations. */
 	if (count > 0)
@@ -11195,16 +11228,30 @@ fetchAttributeStats(Archive *fout)
 		PQExpBuffer query = createPQExpBuffer();
 
 		appendPQExpBufferStr(query, "EXECUTE getAttributeStats(");
-		appendStringLiteralAH(query, nspnames->data, fout);
-		appendPQExpBufferStr(query, "::pg_catalog.name[],");
-		appendStringLiteralAH(query, relnames->data, fout);
-		appendPQExpBufferStr(query, "::pg_catalog.name[])");
+		if (fout->remoteVersion >= 190000)
+		{
+			appendStringLiteralAH(query, starelids->data, fout);
+			appendPQExpBufferStr(query, "::pg_catalog.oid[])");
+		}
+		else
+		{
+			appendStringLiteralAH(query, nspnames->data, fout);
+			appendPQExpBufferStr(query, "::pg_catalog.name[],");
+			appendStringLiteralAH(query, relnames->data, fout);
+			appendPQExpBufferStr(query, "::pg_catalog.name[])");
+		}
+
 		res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
 		destroyPQExpBuffer(query);
 	}
 
-	destroyPQExpBuffer(nspnames);
-	destroyPQExpBuffer(relnames);
+	if (fout->remoteVersion >= 190000)
+		destroyPQExpBuffer(starelids);
+	else
+	{
+		destroyPQExpBuffer(nspnames);
+		destroyPQExpBuffer(relnames);
+	}
 	return res;
 }
 
@@ -11263,8 +11310,18 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te)
 	query = createPQExpBuffer();
 	if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
 	{
+		/*
+		 * Before v19, the starelid was not available in pg_stats. Prior to
+		 * that we must identify tables with schemaname+relname.
+		 */
+		if (fout->remoteVersion >= 190000)
+			appendPQExpBufferStr(query,
+								 "PREPARE getAttributeStats(pg_catalog.oid[]) AS\n");
+		else
+			appendPQExpBufferStr(query,
+								 "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n");
+
 		appendPQExpBufferStr(query,
-							 "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n"
 							 "SELECT s.schemaname, s.tablename, s.attname, s.inherited, "
 							 "s.null_frac, s.avg_width, s.n_distinct, "
 							 "s.most_common_vals, s.most_common_freqs, "
@@ -11283,22 +11340,30 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te)
 								 "NULL AS range_empty_frac,"
 								 "NULL AS range_bounds_histogram ");
 
+		appendPQExpBufferStr(query, "FROM pg_catalog.pg_stats s ");
+
 		/*
 		 * The results must be in the order of the relations supplied in the
 		 * parameters to ensure we remain in sync as we walk through the TOC.
+		 *
 		 * The redundant filter clause on s.tablename = ANY(...) seems
 		 * sufficient to convince the planner to use
 		 * pg_class_relname_nsp_index, which avoids a full scan of pg_stats.
-		 * This may not work for all versions.
+		 * This seems to work for all version prior to v19, after which we
+		 * will use the starelid, which is simpler.
 		 *
 		 * Our query for retrieving statistics for multiple relations uses
 		 * WITH ORDINALITY and multi-argument UNNEST(), both of which were
 		 * introduced in v9.4.  For older versions, we resort to gathering
 		 * statistics for a single relation at a time.
 		 */
-		if (fout->remoteVersion >= 90400)
+		if (fout->remoteVersion >= 190000)
+			appendPQExpBufferStr(query,
+								 "JOIN unnest($1) WITH ORDINALITY AS u (tableid, ord) "
+								 "ON s.tableid = u.tableid "
+								 "ORDER BY u.ord, s.attname, s.inherited");
+		else if (fout->remoteVersion >= 90400)
 			appendPQExpBufferStr(query,
-								 "FROM pg_catalog.pg_stats s "
 								 "JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) "
 								 "ON s.schemaname = u.schemaname "
 								 "AND s.tablename = u.tablename "
@@ -11306,7 +11371,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te)
 								 "ORDER BY u.ord, s.attname, s.inherited");
 		else
 			appendPQExpBufferStr(query,
-								 "FROM pg_catalog.pg_stats s "
 								 "WHERE s.schemaname = $1[1] "
 								 "AND s.tablename = $2[1] "
 								 "ORDER BY s.attname, s.inherited");
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 1c11a79083f..6fa248cc812 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -448,6 +448,7 @@ typedef struct _indexAttachInfo
 typedef struct _relStatsInfo
 {
 	DumpableObject dobj;
+	Oid			starelid;
 	int32		relpages;
 	char	   *reltuples;
 	int32		relallvisible;
-- 
2.50.1 (Apple Git-155)

