On Tue, 2024-03-26 at 00:16 +0100, Tomas Vondra wrote:
> I did take a closer look at v13 today. I have a bunch of comments and
> some minor whitespace fixes in the attached review patches.

I also attached a patch implementing a different approach to the
pg_dump support. Instead of trying to create a query that uses SQL
"format()" to create more SQL, I did all the formatting in C. It turned
out to be about 30% fewer lines, and I find it more understandable and
consistent with the way other stuff in pg_dump happens.

The attached patch is pretty rough -- not many comments, and perhaps
some things should be moved around. I only tested very basic
dump/reload in SQL format.

Regards,
        Jeff Davis

From 7ca575e5a02bf380af92b6144622468a501f7636 Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huin...@gmail.com>
Date: Sat, 16 Mar 2024 17:21:10 -0400
Subject: [PATCH vjeff] Enable dumping of table/index stats in pg_dump.

For each table/matview/index dumped, it will also generate a statement
that calls all of the pg_set_relation_stats() and
pg_set_attribute_stats() calls necessary to restore the statistics of
the current system onto the destination system.

As is the pattern with pg_dump options, this can be disabled with
--no-statistics.
---
 src/bin/pg_dump/pg_backup.h          |   2 +
 src/bin/pg_dump/pg_backup_archiver.c |   5 +
 src/bin/pg_dump/pg_dump.c            | 229 ++++++++++++++++++++++++++-
 src/bin/pg_dump/pg_dump.h            |   1 +
 src/bin/pg_dump/pg_dumpall.c         |   5 +
 src/bin/pg_dump/pg_restore.c         |   3 +
 6 files changed, 243 insertions(+), 2 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 9ef2f2017e..1db5cf52eb 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -112,6 +112,7 @@ typedef struct _restoreOptions
 	int			no_publications;	/* Skip publication entries */
 	int			no_security_labels; /* Skip security label entries */
 	int			no_subscriptions;	/* Skip subscription entries */
+	int			no_statistics;		/* Skip statistics import */
 	int			strict_names;
 
 	const char *filename;
@@ -179,6 +180,7 @@ typedef struct _dumpOptions
 	int			no_security_labels;
 	int			no_publications;
 	int			no_subscriptions;
+	int			no_statistics;
 	int			no_toast_compression;
 	int			no_unlogged_table_data;
 	int			serializable_deferrable;
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index d97ebaff5b..d5f61399d9 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2833,6 +2833,10 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
 	if (ropt->no_subscriptions && strcmp(te->desc, "SUBSCRIPTION") == 0)
 		return 0;
 
+	/* If it's a stats dump, maybe ignore it */
+	if (ropt->no_statistics && strcmp(te->desc, "STATISTICS") == 0)
+		return 0;
+
 	/* Ignore it if section is not to be dumped/restored */
 	switch (curSection)
 	{
@@ -2862,6 +2866,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
 	 */
 	if (strcmp(te->desc, "ACL") == 0 ||
 		strcmp(te->desc, "COMMENT") == 0 ||
+		strcmp(te->desc, "STATISTICS") == 0 ||
 		strcmp(te->desc, "SECURITY LABEL") == 0)
 	{
 		/* Database properties react to createDB, not selectivity options. */
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index b1c4c3ec7f..d483122998 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -428,6 +428,7 @@ main(int argc, char **argv)
 		{"no-comments", no_argument, &dopt.no_comments, 1},
 		{"no-publications", no_argument, &dopt.no_publications, 1},
 		{"no-security-labels", no_argument, &dopt.no_security_labels, 1},
+		{"no-statistics", no_argument, &dopt.no_statistics, 1},
 		{"no-subscriptions", no_argument, &dopt.no_subscriptions, 1},
 		{"no-toast-compression", no_argument, &dopt.no_toast_compression, 1},
 		{"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1},
@@ -1144,6 +1145,7 @@ help(const char *progname)
 	printf(_("  --no-comments                do not dump comments\n"));
 	printf(_("  --no-publications            do not dump publications\n"));
 	printf(_("  --no-security-labels         do not dump security label assignments\n"));
+	printf(_("  --no-statistics              do not dump statistics\n"));
 	printf(_("  --no-subscriptions           do not dump subscriptions\n"));
 	printf(_("  --no-table-access-method     do not dump table access methods\n"));
 	printf(_("  --no-tablespaces             do not dump tablespace assignments\n"));
@@ -7001,6 +7003,7 @@ getTables(Archive *fout, int *numTables)
 
 		/* Tables have data */
 		tblinfo[i].dobj.components |= DUMP_COMPONENT_DATA;
+		tblinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS;
 
 		/* Mark whether table has an ACL */
 		if (!PQgetisnull(res, i, i_relacl))
@@ -7498,6 +7501,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 			indxinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid));
 			indxinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid));
 			AssignDumpId(&indxinfo[j].dobj);
+			indxinfo[j].dobj.components |= DUMP_COMPONENT_STATISTICS;
 			indxinfo[j].dobj.dump = tbinfo->dobj.dump;
 			indxinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_indexname));
 			indxinfo[j].dobj.namespace = tbinfo->dobj.namespace;
@@ -10247,6 +10251,212 @@ dumpComment(Archive *fout, const char *type,
 						catalogId, subid, dumpId, NULL);
 }
 
+static const char *rel_stats_arginfo[][2] = {
+	{"relation", "regclass"},
+	{"relpages", "integer"},
+	{"reltuples", "real"},
+	{"relallvisible", "integer"},
+};
+
+static const char *att_stats_arginfo[][2] = {
+	{"relation", "regclass"},
+	{"attname", "name"},
+	{"inherited", "boolean"},
+	{"null_frac", "float4"},
+	{"avg_width", "integer"},
+	{"n_distinct", "float4"},
+	{"most_common_vals", "text"},
+	{"most_common_freqs", "float4[]"},
+	{"histogram_bounds", "text"},
+	{"correlation", "float4"},
+	{"most_common_elems", "text"},
+	{"most_common_elem_freqs", "float4[]"},
+	{"elem_count_histogram", "float4[]"},
+	{"range_length_histogram", "text"},
+	{"range_empty_frac", "float4"},
+	{"range_bounds_histogram", "text"},
+};
+
+
+static void
+getRelStatsExportQuery(PQExpBuffer query, Archive *fout,
+					   const char *schemaname, const char *tablename)
+{
+	resetPQExpBuffer(query);
+	appendPQExpBufferStr(query,
+						 "SELECT oid::regclass as relation, relpages, "
+						 "reltuples, relallvisible "
+						 "FROM pg_class "
+						 "WHERE relnamespace::regnamespace::name = ");
+	appendStringLiteralAH(query, schemaname, fout);
+	appendPQExpBufferStr(query, " AND relname = ");
+	appendStringLiteralAH(query, tablename, fout);
+}
+
+static void
+getAttStatsExportQuery(PQExpBuffer query, Archive *fout,
+					   const char *schemaname, const char *tablename)
+{
+	resetPQExpBuffer(query);
+	appendPQExpBufferStr(query,
+						 "SELECT c.oid::regclass AS relation, "
+						 "s.attname,"
+						 "s.inherited,"
+						 "s.null_frac,"
+						 "s.avg_width,"
+						 "s.n_distinct,"
+						 "s.most_common_vals,"
+						 "s.most_common_freqs,"
+						 "s.histogram_bounds,"
+						 "s.correlation,"
+						 "s.most_common_elems,"
+						 "s.most_common_elem_freqs,"
+						 "s.elem_count_histogram,");
+
+	if (fout->remoteVersion >= 170000)
+		appendPQExpBufferStr(query,
+							 "s.range_length_histogram,"
+							 "s.range_empty_frac,"
+							 "s.range_bounds_histogram ");
+	else
+		appendPQExpBufferStr(query,
+							 "NULL AS range_length_histogram,"
+							 "NULL AS range_empty_frac,"
+							 "NULL AS range_bounds_histogram ");
+
+	appendPQExpBufferStr(query,
+						 "FROM pg_stats s, pg_class c "
+						 "WHERE c.relnamespace::regnamespace::name = s.schemaname "
+						 "AND c.relname = s.tablename "
+						 "AND s.schemaname = ");
+	appendStringLiteralAH(query, schemaname, fout);
+	appendPQExpBufferStr(query, " AND s.tablename = ");
+	appendStringLiteralAH(query, tablename, fout);
+}
+
+static void
+appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
+					const char *argval, const char *argtype)
+{
+	appendPQExpBuffer(out, "\t%s => ", argname);
+	appendStringLiteralAH(out, argval, fout);
+	appendPQExpBuffer(out, "::%s", argtype);
+}
+
+static void
+appendRelStatsImport(PQExpBuffer out, Archive *fout, PGresult *res)
+{
+	const char *sep = "";
+
+	if (PQntuples(res) == 0)
+		return;
+
+	if (PQntuples(res) > 1)
+		pg_fatal("relation stats export returned %d rows, expected 1",
+				 PQntuples(res));
+
+	appendPQExpBufferStr(out, "SELECT pg_catalog.pg_set_relation_stats(\n");
+	for (int argno = 0; argno < lengthof(rel_stats_arginfo); argno++)
+	{
+		const char	*argname = rel_stats_arginfo[argno][0];
+		const char	*argtype = rel_stats_arginfo[argno][1];
+		int			 fieldno = PQfnumber(res, argname);
+
+		if (fieldno < 0)
+			pg_fatal("relation stats export query missing field '%s'",
+					 argname);
+
+		if (PQgetisnull(res, 0, fieldno))
+			continue;
+
+		appendPQExpBufferStr(out, sep);
+		appendNamedArgument(out, fout, argname,
+							PQgetvalue(res, 0, fieldno), argtype);
+		sep = ",\n";
+	}
+	appendPQExpBufferStr(out, "\n);\n");
+}
+
+static void
+appendAttStatsImport(PQExpBuffer out, Archive *fout, PGresult *res)
+{
+	for (int rownum = 0; rownum < PQntuples(res); rownum++)
+	{
+		const char *sep = "";
+		appendPQExpBufferStr(out, "SELECT pg_catalog.pg_set_attribute_stats(\n");
+		for (int argno = 0; argno < lengthof(att_stats_arginfo); argno++)
+		{
+			const char	*argname = att_stats_arginfo[argno][0];
+			const char	*argtype = att_stats_arginfo[argno][1];
+			int			 fieldno = PQfnumber(res, argname);
+
+			if (fieldno < 0)
+				pg_fatal("attribute stats export query missing field '%s'",
+						 argname);
+
+			if (PQgetisnull(res, rownum, fieldno))
+				continue;
+
+			appendPQExpBufferStr(out, sep);
+			appendNamedArgument(out, fout, argname,
+								PQgetvalue(res, rownum, fieldno), argtype);
+			sep = ",\n";
+		}
+		appendPQExpBufferStr(out, "\n);\n");
+	}
+}
+
+/*
+ * dumpRelationStats --
+ *
+ * Dump command to import stats into the relation on the new database.
+ */
+static void
+dumpRelationStats(Archive *fout, const DumpableObject *dobj,
+				  const char *reltypename, DumpId dumpid)
+{
+	PGresult   *res;
+	PQExpBuffer query;
+	PQExpBuffer out;
+	PQExpBuffer tag;
+
+	/* do nothing, if --no-statistics is supplied */
+	if (fout->dopt->no_statistics)
+		return;
+
+	tag = createPQExpBuffer();
+	appendPQExpBuffer(tag, "%s %s", reltypename,
+					  fmtId(dobj->name));
+
+	query = createPQExpBuffer();
+	out = createPQExpBuffer();
+
+	getRelStatsExportQuery(query, fout, dobj->namespace->dobj.name,
+						   dobj->name);
+	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+	appendRelStatsImport(out, fout, res);
+	PQclear(res);
+
+	getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name,
+						   dobj->name);
+	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+	appendAttStatsImport(out, fout, res);
+	PQclear(res);
+
+	ArchiveEntry(fout, nilCatalogId, createDumpId(),
+				 ARCHIVE_OPTS(.tag = tag->data,
+							  .namespace = dobj->namespace->dobj.name,
+							  .description = "STATISTICS DATA",
+							  .section = SECTION_NONE,
+							  .createStmt = out->data,
+							  .deps = &dumpid,
+							  .nDeps = 1));
+
+	destroyPQExpBuffer(query);
+	destroyPQExpBuffer(tag);
+	destroyPQExpBuffer(out);
+}
+
 /*
  * dumpTableComment --
  *
@@ -16681,6 +16891,13 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
 	if (tbinfo->dobj.dump & DUMP_COMPONENT_SECLABEL)
 		dumpTableSecLabel(fout, tbinfo, reltypename);
 
+	/* Statistics are dependent on the definition, not the data */
+	/* Views don't have stats */
+	if ((tbinfo->dobj.dump & DUMP_COMPONENT_STATISTICS) &&
+		(tbinfo->relkind != RELKIND_VIEW))
+		dumpRelationStats(fout, &tbinfo->dobj, reltypename,
+						  tbinfo->dobj.dumpId);
+
 	/* Dump comments on inlined table constraints */
 	for (j = 0; j < tbinfo->ncheck; j++)
 	{
@@ -16882,6 +17099,7 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo)
 	PQExpBuffer delq;
 	char	   *qindxname;
 	char	   *qqindxname;
+	DumpId		dumpid;
 
 	/* Do nothing in data-only dump */
 	if (dopt->dataOnly)
@@ -16994,14 +17212,21 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo)
 		free(indstatvalsarray);
 	}
 
+	/* Comments and stats share same .dep */
+	dumpid = is_constraint ? indxinfo->indexconstraint :
+		indxinfo->dobj.dumpId;
+
 	/* Dump Index Comments */
 	if (indxinfo->dobj.dump & DUMP_COMPONENT_COMMENT)
 		dumpComment(fout, "INDEX", qindxname,
 					tbinfo->dobj.namespace->dobj.name,
 					tbinfo->rolname,
 					indxinfo->dobj.catId, 0,
-					is_constraint ? indxinfo->indexconstraint :
-					indxinfo->dobj.dumpId);
+					dumpid);
+
+	/* Dump Index Stats */
+	if (indxinfo->dobj.dump & DUMP_COMPONENT_STATISTICS)
+		dumpRelationStats(fout, &indxinfo->dobj, "INDEX", dumpid);
 
 	destroyPQExpBuffer(q);
 	destroyPQExpBuffer(delq);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 9bc93520b4..d6a071ec28 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -101,6 +101,7 @@ typedef uint32 DumpComponents;
 #define DUMP_COMPONENT_ACL			(1 << 4)
 #define DUMP_COMPONENT_POLICY		(1 << 5)
 #define DUMP_COMPONENT_USERMAP		(1 << 6)
+#define DUMP_COMPONENT_STATISTICS	(1 << 7)
 #define DUMP_COMPONENT_ALL			(0xFFFF)
 
 /*
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index 046c0dc3b3..69652aa205 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -105,6 +105,7 @@ static int	use_setsessauth = 0;
 static int	no_comments = 0;
 static int	no_publications = 0;
 static int	no_security_labels = 0;
+static int	no_statistics = 0;
 static int	no_subscriptions = 0;
 static int	no_toast_compression = 0;
 static int	no_unlogged_table_data = 0;
@@ -174,6 +175,7 @@ main(int argc, char *argv[])
 		{"no-role-passwords", no_argument, &no_role_passwords, 1},
 		{"no-security-labels", no_argument, &no_security_labels, 1},
 		{"no-subscriptions", no_argument, &no_subscriptions, 1},
+		{"no-statistics", no_argument, &no_statistics, 1},
 		{"no-sync", no_argument, NULL, 4},
 		{"no-toast-compression", no_argument, &no_toast_compression, 1},
 		{"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1},
@@ -453,6 +455,8 @@ main(int argc, char *argv[])
 		appendPQExpBufferStr(pgdumpopts, " --no-publications");
 	if (no_security_labels)
 		appendPQExpBufferStr(pgdumpopts, " --no-security-labels");
+	if (no_statistics)
+		appendPQExpBufferStr(pgdumpopts, " --no-statistics");
 	if (no_subscriptions)
 		appendPQExpBufferStr(pgdumpopts, " --no-subscriptions");
 	if (no_toast_compression)
@@ -668,6 +672,7 @@ help(void)
 	printf(_("  --no-publications            do not dump publications\n"));
 	printf(_("  --no-role-passwords          do not dump passwords for roles\n"));
 	printf(_("  --no-security-labels         do not dump security label assignments\n"));
+	printf(_("  --no-statistics              do not dump statistics\n"));
 	printf(_("  --no-subscriptions           do not dump subscriptions\n"));
 	printf(_("  --no-sync                    do not wait for changes to be written safely to disk\n"));
 	printf(_("  --no-table-access-method     do not dump table access methods\n"));
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index c3beacdec1..2d326dec72 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -75,6 +75,7 @@ main(int argc, char **argv)
 	static int	no_publications = 0;
 	static int	no_security_labels = 0;
 	static int	no_subscriptions = 0;
+	static int  no_statistics = 0;
 	static int	strict_names = 0;
 
 	struct option cmdopts[] = {
@@ -126,6 +127,7 @@ main(int argc, char **argv)
 		{"no-security-labels", no_argument, &no_security_labels, 1},
 		{"no-subscriptions", no_argument, &no_subscriptions, 1},
 		{"filter", required_argument, NULL, 4},
+		{"no-statistics", no_argument, &no_statistics, 1},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -358,6 +360,7 @@ main(int argc, char **argv)
 	opts->no_publications = no_publications;
 	opts->no_security_labels = no_security_labels;
 	opts->no_subscriptions = no_subscriptions;
+	opts->no_statistics = no_statistics;
 
 	if (if_exists && !opts->dropSchema)
 		pg_fatal("option --if-exists requires option -c/--clean");
-- 
2.34.1

Reply via email to