On Tue, 2024-03-26 at 00:16 +0100, Tomas Vondra wrote: > I did take a closer look at v13 today. I have a bunch of comments and > some minor whitespace fixes in the attached review patches.
I also attached a patch implementing a different approach to the pg_dump support. Instead of trying to create a query that uses SQL "format()" to create more SQL, I did all the formatting in C. It turned out to be about 30% fewer lines, and I find it more understandable and consistent with the way other stuff in pg_dump happens. The attached patch is pretty rough -- not many comments, and perhaps some things should be moved around. I only tested very basic dump/reload in SQL format. Regards, Jeff Davis
From 7ca575e5a02bf380af92b6144622468a501f7636 Mon Sep 17 00:00:00 2001 From: Corey Huinker <corey.huin...@gmail.com> Date: Sat, 16 Mar 2024 17:21:10 -0400 Subject: [PATCH vjeff] Enable dumping of table/index stats in pg_dump. For each table/matview/index dumped, it will also generate a statement that calls all of the pg_set_relation_stats() and pg_set_attribute_stats() calls necessary to restore the statistics of the current system onto the destination system. As is the pattern with pg_dump options, this can be disabled with --no-statistics. --- src/bin/pg_dump/pg_backup.h | 2 + src/bin/pg_dump/pg_backup_archiver.c | 5 + src/bin/pg_dump/pg_dump.c | 229 ++++++++++++++++++++++++++- src/bin/pg_dump/pg_dump.h | 1 + src/bin/pg_dump/pg_dumpall.c | 5 + src/bin/pg_dump/pg_restore.c | 3 + 6 files changed, 243 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 9ef2f2017e..1db5cf52eb 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -112,6 +112,7 @@ typedef struct _restoreOptions int no_publications; /* Skip publication entries */ int no_security_labels; /* Skip security label entries */ int no_subscriptions; /* Skip subscription entries */ + int no_statistics; /* Skip statistics import */ int strict_names; const char *filename; @@ -179,6 +180,7 @@ typedef struct _dumpOptions int no_security_labels; int no_publications; int no_subscriptions; + int no_statistics; int no_toast_compression; int no_unlogged_table_data; int serializable_deferrable; diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index d97ebaff5b..d5f61399d9 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -2833,6 +2833,10 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) if (ropt->no_subscriptions && strcmp(te->desc, "SUBSCRIPTION") == 0) return 0; + /* If it's a stats dump, maybe ignore it */ + if (ropt->no_statistics && strcmp(te->desc, "STATISTICS") == 0) + return 0; + /* Ignore it if section is not to be dumped/restored */ switch (curSection) { @@ -2862,6 +2866,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) */ if (strcmp(te->desc, "ACL") == 0 || strcmp(te->desc, "COMMENT") == 0 || + strcmp(te->desc, "STATISTICS") == 0 || strcmp(te->desc, "SECURITY LABEL") == 0) { /* Database properties react to createDB, not selectivity options. */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b1c4c3ec7f..d483122998 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -428,6 +428,7 @@ main(int argc, char **argv) {"no-comments", no_argument, &dopt.no_comments, 1}, {"no-publications", no_argument, &dopt.no_publications, 1}, {"no-security-labels", no_argument, &dopt.no_security_labels, 1}, + {"no-statistics", no_argument, &dopt.no_statistics, 1}, {"no-subscriptions", no_argument, &dopt.no_subscriptions, 1}, {"no-toast-compression", no_argument, &dopt.no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1}, @@ -1144,6 +1145,7 @@ help(const char *progname) printf(_(" --no-comments do not dump comments\n")); printf(_(" --no-publications do not dump publications\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); + printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); @@ -7001,6 +7003,7 @@ getTables(Archive *fout, int *numTables) /* Tables have data */ tblinfo[i].dobj.components |= DUMP_COMPONENT_DATA; + tblinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS; /* Mark whether table has an ACL */ if (!PQgetisnull(res, i, i_relacl)) @@ -7498,6 +7501,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) indxinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); indxinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); AssignDumpId(&indxinfo[j].dobj); + indxinfo[j].dobj.components |= DUMP_COMPONENT_STATISTICS; indxinfo[j].dobj.dump = tbinfo->dobj.dump; indxinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_indexname)); indxinfo[j].dobj.namespace = tbinfo->dobj.namespace; @@ -10247,6 +10251,212 @@ dumpComment(Archive *fout, const char *type, catalogId, subid, dumpId, NULL); } +static const char *rel_stats_arginfo[][2] = { + {"relation", "regclass"}, + {"relpages", "integer"}, + {"reltuples", "real"}, + {"relallvisible", "integer"}, +}; + +static const char *att_stats_arginfo[][2] = { + {"relation", "regclass"}, + {"attname", "name"}, + {"inherited", "boolean"}, + {"null_frac", "float4"}, + {"avg_width", "integer"}, + {"n_distinct", "float4"}, + {"most_common_vals", "text"}, + {"most_common_freqs", "float4[]"}, + {"histogram_bounds", "text"}, + {"correlation", "float4"}, + {"most_common_elems", "text"}, + {"most_common_elem_freqs", "float4[]"}, + {"elem_count_histogram", "float4[]"}, + {"range_length_histogram", "text"}, + {"range_empty_frac", "float4"}, + {"range_bounds_histogram", "text"}, +}; + + +static void +getRelStatsExportQuery(PQExpBuffer query, Archive *fout, + const char *schemaname, const char *tablename) +{ + resetPQExpBuffer(query); + appendPQExpBufferStr(query, + "SELECT oid::regclass as relation, relpages, " + "reltuples, relallvisible " + "FROM pg_class " + "WHERE relnamespace::regnamespace::name = "); + appendStringLiteralAH(query, schemaname, fout); + appendPQExpBufferStr(query, " AND relname = "); + appendStringLiteralAH(query, tablename, fout); +} + +static void +getAttStatsExportQuery(PQExpBuffer query, Archive *fout, + const char *schemaname, const char *tablename) +{ + resetPQExpBuffer(query); + appendPQExpBufferStr(query, + "SELECT c.oid::regclass AS relation, " + "s.attname," + "s.inherited," + "s.null_frac," + "s.avg_width," + "s.n_distinct," + "s.most_common_vals," + "s.most_common_freqs," + "s.histogram_bounds," + "s.correlation," + "s.most_common_elems," + "s.most_common_elem_freqs," + "s.elem_count_histogram,"); + + if (fout->remoteVersion >= 170000) + appendPQExpBufferStr(query, + "s.range_length_histogram," + "s.range_empty_frac," + "s.range_bounds_histogram "); + else + appendPQExpBufferStr(query, + "NULL AS range_length_histogram," + "NULL AS range_empty_frac," + "NULL AS range_bounds_histogram "); + + appendPQExpBufferStr(query, + "FROM pg_stats s, pg_class c " + "WHERE c.relnamespace::regnamespace::name = s.schemaname " + "AND c.relname = s.tablename " + "AND s.schemaname = "); + appendStringLiteralAH(query, schemaname, fout); + appendPQExpBufferStr(query, " AND s.tablename = "); + appendStringLiteralAH(query, tablename, fout); +} + +static void +appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, + const char *argval, const char *argtype) +{ + appendPQExpBuffer(out, "\t%s => ", argname); + appendStringLiteralAH(out, argval, fout); + appendPQExpBuffer(out, "::%s", argtype); +} + +static void +appendRelStatsImport(PQExpBuffer out, Archive *fout, PGresult *res) +{ + const char *sep = ""; + + if (PQntuples(res) == 0) + return; + + if (PQntuples(res) > 1) + pg_fatal("relation stats export returned %d rows, expected 1", + PQntuples(res)); + + appendPQExpBufferStr(out, "SELECT pg_catalog.pg_set_relation_stats(\n"); + for (int argno = 0; argno < lengthof(rel_stats_arginfo); argno++) + { + const char *argname = rel_stats_arginfo[argno][0]; + const char *argtype = rel_stats_arginfo[argno][1]; + int fieldno = PQfnumber(res, argname); + + if (fieldno < 0) + pg_fatal("relation stats export query missing field '%s'", + argname); + + if (PQgetisnull(res, 0, fieldno)) + continue; + + appendPQExpBufferStr(out, sep); + appendNamedArgument(out, fout, argname, + PQgetvalue(res, 0, fieldno), argtype); + sep = ",\n"; + } + appendPQExpBufferStr(out, "\n);\n"); +} + +static void +appendAttStatsImport(PQExpBuffer out, Archive *fout, PGresult *res) +{ + for (int rownum = 0; rownum < PQntuples(res); rownum++) + { + const char *sep = ""; + appendPQExpBufferStr(out, "SELECT pg_catalog.pg_set_attribute_stats(\n"); + for (int argno = 0; argno < lengthof(att_stats_arginfo); argno++) + { + const char *argname = att_stats_arginfo[argno][0]; + const char *argtype = att_stats_arginfo[argno][1]; + int fieldno = PQfnumber(res, argname); + + if (fieldno < 0) + pg_fatal("attribute stats export query missing field '%s'", + argname); + + if (PQgetisnull(res, rownum, fieldno)) + continue; + + appendPQExpBufferStr(out, sep); + appendNamedArgument(out, fout, argname, + PQgetvalue(res, rownum, fieldno), argtype); + sep = ",\n"; + } + appendPQExpBufferStr(out, "\n);\n"); + } +} + +/* + * dumpRelationStats -- + * + * Dump command to import stats into the relation on the new database. + */ +static void +dumpRelationStats(Archive *fout, const DumpableObject *dobj, + const char *reltypename, DumpId dumpid) +{ + PGresult *res; + PQExpBuffer query; + PQExpBuffer out; + PQExpBuffer tag; + + /* do nothing, if --no-statistics is supplied */ + if (fout->dopt->no_statistics) + return; + + tag = createPQExpBuffer(); + appendPQExpBuffer(tag, "%s %s", reltypename, + fmtId(dobj->name)); + + query = createPQExpBuffer(); + out = createPQExpBuffer(); + + getRelStatsExportQuery(query, fout, dobj->namespace->dobj.name, + dobj->name); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + appendRelStatsImport(out, fout, res); + PQclear(res); + + getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name, + dobj->name); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + appendAttStatsImport(out, fout, res); + PQclear(res); + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = tag->data, + .namespace = dobj->namespace->dobj.name, + .description = "STATISTICS DATA", + .section = SECTION_NONE, + .createStmt = out->data, + .deps = &dumpid, + .nDeps = 1)); + + destroyPQExpBuffer(query); + destroyPQExpBuffer(tag); + destroyPQExpBuffer(out); +} + /* * dumpTableComment -- * @@ -16681,6 +16891,13 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) if (tbinfo->dobj.dump & DUMP_COMPONENT_SECLABEL) dumpTableSecLabel(fout, tbinfo, reltypename); + /* Statistics are dependent on the definition, not the data */ + /* Views don't have stats */ + if ((tbinfo->dobj.dump & DUMP_COMPONENT_STATISTICS) && + (tbinfo->relkind != RELKIND_VIEW)) + dumpRelationStats(fout, &tbinfo->dobj, reltypename, + tbinfo->dobj.dumpId); + /* Dump comments on inlined table constraints */ for (j = 0; j < tbinfo->ncheck; j++) { @@ -16882,6 +17099,7 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo) PQExpBuffer delq; char *qindxname; char *qqindxname; + DumpId dumpid; /* Do nothing in data-only dump */ if (dopt->dataOnly) @@ -16994,14 +17212,21 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo) free(indstatvalsarray); } + /* Comments and stats share same .dep */ + dumpid = is_constraint ? indxinfo->indexconstraint : + indxinfo->dobj.dumpId; + /* Dump Index Comments */ if (indxinfo->dobj.dump & DUMP_COMPONENT_COMMENT) dumpComment(fout, "INDEX", qindxname, tbinfo->dobj.namespace->dobj.name, tbinfo->rolname, indxinfo->dobj.catId, 0, - is_constraint ? indxinfo->indexconstraint : - indxinfo->dobj.dumpId); + dumpid); + + /* Dump Index Stats */ + if (indxinfo->dobj.dump & DUMP_COMPONENT_STATISTICS) + dumpRelationStats(fout, &indxinfo->dobj, "INDEX", dumpid); destroyPQExpBuffer(q); destroyPQExpBuffer(delq); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 9bc93520b4..d6a071ec28 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -101,6 +101,7 @@ typedef uint32 DumpComponents; #define DUMP_COMPONENT_ACL (1 << 4) #define DUMP_COMPONENT_POLICY (1 << 5) #define DUMP_COMPONENT_USERMAP (1 << 6) +#define DUMP_COMPONENT_STATISTICS (1 << 7) #define DUMP_COMPONENT_ALL (0xFFFF) /* diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 046c0dc3b3..69652aa205 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -105,6 +105,7 @@ static int use_setsessauth = 0; static int no_comments = 0; static int no_publications = 0; static int no_security_labels = 0; +static int no_statistics = 0; static int no_subscriptions = 0; static int no_toast_compression = 0; static int no_unlogged_table_data = 0; @@ -174,6 +175,7 @@ main(int argc, char *argv[]) {"no-role-passwords", no_argument, &no_role_passwords, 1}, {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, + {"no-statistics", no_argument, &no_statistics, 1}, {"no-sync", no_argument, NULL, 4}, {"no-toast-compression", no_argument, &no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1}, @@ -453,6 +455,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --no-publications"); if (no_security_labels) appendPQExpBufferStr(pgdumpopts, " --no-security-labels"); + if (no_statistics) + appendPQExpBufferStr(pgdumpopts, " --no-statistics"); if (no_subscriptions) appendPQExpBufferStr(pgdumpopts, " --no-subscriptions"); if (no_toast_compression) @@ -668,6 +672,7 @@ help(void) printf(_(" --no-publications do not dump publications\n")); printf(_(" --no-role-passwords do not dump passwords for roles\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); + printf(_(" --no-statistics do not dump statistics\n")); printf(_(" --no-subscriptions do not dump subscriptions\n")); printf(_(" --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-table-access-method do not dump table access methods\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index c3beacdec1..2d326dec72 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -75,6 +75,7 @@ main(int argc, char **argv) static int no_publications = 0; static int no_security_labels = 0; static int no_subscriptions = 0; + static int no_statistics = 0; static int strict_names = 0; struct option cmdopts[] = { @@ -126,6 +127,7 @@ main(int argc, char **argv) {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, {"filter", required_argument, NULL, 4}, + {"no-statistics", no_argument, &no_statistics, 1}, {NULL, 0, NULL, 0} }; @@ -358,6 +360,7 @@ main(int argc, char **argv) opts->no_publications = no_publications; opts->no_security_labels = no_security_labels; opts->no_subscriptions = no_subscriptions; + opts->no_statistics = no_statistics; if (if_exists && !opts->dropSchema) pg_fatal("option --if-exists requires option -c/--clean"); -- 2.34.1