On Mon, 2025-02-24 at 09:54 -0500, Andres Freund wrote: > Have you compared performance of with/without stats after these > optimizations?
On unoptimized build with asserts enabled, dumping the regression database: --no-statistics: 1.0s master: 3.6s v3j-0001: 3.0s v3j-0002: 1.7s I plan to commit the patches soon. Regards, Jeff Davis
From d617fb142158e0ca964e5bc8bb3351d993de6062 Mon Sep 17 00:00:00 2001 From: Corey Huinker <corey.huin...@gmail.com> Date: Fri, 21 Feb 2025 23:31:04 -0500 Subject: [PATCH v3j 1/2] Avoid unnecessary relation stats query in pg_dump. The few fields we need can be easily collected in getTables() and getIndexes() and stored in RelStatsInfo. Co-authored-by: Corey Huinker <corey.huin...@gmail.com> Co-authored-by: Jeff Davis <pg...@j-davis.com> Discussion: https://postgr.es/m/CADkLM=f0a43atd88xw4xcfayef25g-7htrhx_whv40hyocs...@mail.gmail.com --- src/bin/pg_dump/pg_dump.c | 145 ++++++++++++++++---------------------- src/bin/pg_dump/pg_dump.h | 5 +- 2 files changed, 64 insertions(+), 86 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index afd79287177..a1823914656 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -56,6 +56,7 @@ #include "common/connect.h" #include "common/int.h" #include "common/relpath.h" +#include "common/shortest_dec.h" #include "compress_io.h" #include "dumputils.h" #include "fe_utils/option_utils.h" @@ -524,6 +525,9 @@ main(int argc, char **argv) pg_logging_set_level(PG_LOG_WARNING); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_dump")); + /* ensure that locale does not affect floating point interpretation */ + setlocale(LC_NUMERIC, "C"); + /* * Initialize what we need for parallel execution, especially for thread * support on Windows. @@ -6814,7 +6818,8 @@ getFuncs(Archive *fout) * */ static RelStatsInfo * -getRelationStatistics(Archive *fout, DumpableObject *rel, char relkind) +getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, + float reltuples, int32 relallvisible, char relkind) { if (!fout->dopt->dumpStatistics) return NULL; @@ -6839,6 +6844,9 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, char relkind) dobj->components |= DUMP_COMPONENT_STATISTICS; dobj->name = pg_strdup(rel->name); dobj->namespace = rel->namespace; + info->relpages = relpages; + info->reltuples = reltuples; + info->relallvisible = relallvisible; info->relkind = relkind; info->postponed_def = false; @@ -6874,6 +6882,8 @@ getTables(Archive *fout, int *numTables) int i_relhasindex; int i_relhasrules; int i_relpages; + int i_reltuples; + int i_relallvisible; int i_toastpages; int i_owning_tab; int i_owning_col; @@ -6924,7 +6934,7 @@ getTables(Archive *fout, int *numTables) "c.relowner, " "c.relchecks, " "c.relhasindex, c.relhasrules, c.relpages, " - "c.relhastriggers, " + "c.reltuples, c.relallvisible, c.relhastriggers, " "c.relpersistence, " "c.reloftype, " "c.relacl, " @@ -7088,6 +7098,8 @@ getTables(Archive *fout, int *numTables) i_relhasindex = PQfnumber(res, "relhasindex"); i_relhasrules = PQfnumber(res, "relhasrules"); i_relpages = PQfnumber(res, "relpages"); + i_reltuples = PQfnumber(res, "reltuples"); + i_relallvisible = PQfnumber(res, "relallvisible"); i_toastpages = PQfnumber(res, "toastpages"); i_owning_tab = PQfnumber(res, "owning_tab"); i_owning_col = PQfnumber(res, "owning_col"); @@ -7134,6 +7146,9 @@ getTables(Archive *fout, int *numTables) for (i = 0; i < ntups; i++) { + float reltuples = strtof(PQgetvalue(res, i, i_reltuples), NULL); + int32 relallvisible = atoi(PQgetvalue(res, i, i_relallvisible)); + tblinfo[i].dobj.objType = DO_TABLE; tblinfo[i].dobj.catId.tableoid = atooid(PQgetvalue(res, i, i_reltableoid)); tblinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_reloid)); @@ -7233,7 +7248,8 @@ getTables(Archive *fout, int *numTables) /* Add statistics */ if (tblinfo[i].interesting) - getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relkind); + getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relpages, + reltuples, relallvisible, tblinfo[i].relkind); /* * Read-lock target tables to make sure they aren't DROPPED or altered @@ -7499,6 +7515,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_oid, i_indrelid, i_indexname, + i_relpages, + i_reltuples, + i_relallvisible, i_parentidx, i_indexdef, i_indnkeyatts, @@ -7552,6 +7571,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) appendPQExpBufferStr(query, "SELECT t.tableoid, t.oid, i.indrelid, " "t.relname AS indexname, " + "t.relpages, t.reltuples, t.relallvisible, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " "i.indkey, i.indisclustered, " "c.contype, c.conname, " @@ -7659,6 +7679,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_oid = PQfnumber(res, "oid"); i_indrelid = PQfnumber(res, "indrelid"); i_indexname = PQfnumber(res, "indexname"); + i_relpages = PQfnumber(res, "relpages"); + i_reltuples = PQfnumber(res, "reltuples"); + i_relallvisible = PQfnumber(res, "relallvisible"); i_parentidx = PQfnumber(res, "parentidx"); i_indexdef = PQfnumber(res, "indexdef"); i_indnkeyatts = PQfnumber(res, "indnkeyatts"); @@ -7725,6 +7748,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) char contype; char indexkind; RelStatsInfo *relstats; + int32 relpages = atoi(PQgetvalue(res, j, i_relpages)); + float reltuples = strtof(PQgetvalue(res, j, i_reltuples), NULL); + int32 relallvisible = atoi(PQgetvalue(res, j, i_relallvisible)); indxinfo[j].dobj.objType = DO_INDEX; indxinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); @@ -7759,7 +7785,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) indexkind = RELKIND_PARTITIONED_INDEX; contype = *(PQgetvalue(res, j, i_contype)); - relstats = getRelationStatistics(fout, &indxinfo[j].dobj, indexkind); + relstats = getRelationStatistics(fout, &indxinfo[j].dobj, relpages, + reltuples, relallvisible, indexkind); if (contype == 'p' || contype == 'u' || contype == 'x') { @@ -10383,18 +10410,6 @@ dumpComment(Archive *fout, const char *type, catalogId, subid, dumpId, NULL); } -/* - * Tabular description of the parameters to pg_restore_relation_stats() - * param_name, param_type - */ -static const char *rel_stats_arginfo[][2] = { - {"relation", "regclass"}, - {"version", "integer"}, - {"relpages", "integer"}, - {"reltuples", "real"}, - {"relallvisible", "integer"}, -}; - /* * Tabular description of the parameters to pg_restore_attribute_stats() * param_name, param_type @@ -10419,30 +10434,6 @@ static const char *att_stats_arginfo[][2] = { {"range_bounds_histogram", "text"}, }; -/* - * getRelStatsExportQuery -- - * - * Generate a query that will fetch all relation (e.g. pg_class) - * stats for a given relation. - */ -static void -getRelStatsExportQuery(PQExpBuffer query, Archive *fout, - const char *schemaname, const char *relname) -{ - resetPQExpBuffer(query); - appendPQExpBufferStr(query, - "SELECT c.oid::regclass AS relation, " - "current_setting('server_version_num') AS version, " - "c.relpages, c.reltuples, c.relallvisible " - "FROM pg_class c " - "JOIN pg_namespace n " - "ON n.oid = c.relnamespace " - "WHERE n.nspname = "); - appendStringLiteralAH(query, schemaname, fout); - appendPQExpBufferStr(query, " AND c.relname = "); - appendStringLiteralAH(query, relname, fout); -} - /* * getAttStatsExportQuery -- * @@ -10454,21 +10445,22 @@ getAttStatsExportQuery(PQExpBuffer query, Archive *fout, const char *schemaname, const char *relname) { resetPQExpBuffer(query); - appendPQExpBufferStr(query, - "SELECT c.oid::regclass AS relation, " - "s.attname," - "s.inherited," - "current_setting('server_version_num') AS version, " - "s.null_frac," - "s.avg_width," - "s.n_distinct," - "s.most_common_vals," - "s.most_common_freqs," - "s.histogram_bounds," - "s.correlation," - "s.most_common_elems," - "s.most_common_elem_freqs," - "s.elem_count_histogram,"); + appendPQExpBuffer(query, + "SELECT c.oid::regclass AS relation, " + "s.attname," + "s.inherited," + "'%u'::integer AS version, " + "s.null_frac," + "s.avg_width," + "s.n_distinct," + "s.most_common_vals," + "s.most_common_freqs," + "s.histogram_bounds," + "s.correlation," + "s.most_common_elems," + "s.most_common_elem_freqs," + "s.elem_count_histogram,", + fout->remoteVersion); if (fout->remoteVersion >= 170000) appendPQExpBufferStr(query, @@ -10521,34 +10513,21 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, * Append a formatted pg_restore_relation_stats statement. */ static void -appendRelStatsImport(PQExpBuffer out, Archive *fout, PGresult *res) +appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo) { - const char *sep = ""; + const char *qualname = fmtQualifiedId(rsinfo->dobj.namespace->dobj.name, rsinfo->dobj.name); + char reltuples_str[FLOAT_SHORTEST_DECIMAL_LEN]; - if (PQntuples(res) == 0) - return; + float_to_shortest_decimal_buf(rsinfo->reltuples, reltuples_str); appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n"); - - for (int argno = 0; argno < lengthof(rel_stats_arginfo); argno++) - { - const char *argname = rel_stats_arginfo[argno][0]; - const char *argtype = rel_stats_arginfo[argno][1]; - int fieldno = PQfnumber(res, argname); - - if (fieldno < 0) - pg_fatal("relation stats export query missing field '%s'", - argname); - - if (PQgetisnull(res, 0, fieldno)) - continue; - - appendPQExpBufferStr(out, sep); - appendNamedArgument(out, fout, argname, PQgetvalue(res, 0, fieldno), argtype); - - sep = ",\n"; - } - appendPQExpBufferStr(out, "\n);\n"); + appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", qualname); + appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", + fout->remoteVersion); + appendPQExpBuffer(out, "\t'relpages', '%d'::integer,\n", rsinfo->relpages); + appendPQExpBuffer(out, "\t'reltuples', '%s'::real,\n", reltuples_str); + appendPQExpBuffer(out, "\t'relallvisible', '%d'::integer\n);\n", + rsinfo->relallvisible); } /* @@ -10643,15 +10622,11 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) tag = createPQExpBuffer(); appendPQExpBufferStr(tag, fmtId(dobj->name)); - query = createPQExpBuffer(); out = createPQExpBuffer(); - getRelStatsExportQuery(query, fout, dobj->namespace->dobj.name, - dobj->name); - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - appendRelStatsImport(out, fout, res); - PQclear(res); + appendRelStatsImport(out, fout, rsinfo); + query = createPQExpBuffer(); getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name, dobj->name); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index f08f5905aa3..9d6a4857c4b 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -328,7 +328,7 @@ typedef struct _tableInfo Oid owning_tab; /* OID of table owning sequence */ int owning_col; /* attr # of column owning sequence */ bool is_identity_sequence; - int relpages; /* table's size in pages (from pg_class) */ + int32 relpages; /* table's size in pages (from pg_class) */ int toastpages; /* toast table's size in pages, if any */ bool interesting; /* true if need to collect more data */ @@ -438,6 +438,9 @@ typedef struct _indexAttachInfo typedef struct _relStatsInfo { DumpableObject dobj; + int32 relpages; + float reltuples; + int32 relallvisible; char relkind; /* 'r', 'm', 'i', etc */ bool postponed_def; /* stats must be postponed into post-data */ } RelStatsInfo; -- 2.34.1
From 33cde5fdbb207a99bce678015e1e45df0210bb56 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Tue, 25 Feb 2025 17:15:47 -0800 Subject: [PATCH v3j 2/2] pg_dump: prepare attribute stats query. Follow precedent in pg_dump for preparing queries to improve performance. Also, simplify the query by removing unnecessary joins. Reported-by: Andres Freund <and...@anarazel.de> Co-authored-by: Corey Huinker <corey.huin...@gmail.com> Co-authored-by: Jeff Davis <pg...@j-davis.com> Discussion: https://postgr.es/m/CADkLM=drmc6t8gp9gvf6y6e_r5echqjmaah_vpyih_zmiq0...@mail.gmail.com --- src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_dump.c | 123 +++++++++++++++++------------------- 2 files changed, 58 insertions(+), 66 deletions(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 350cf659c41..e783cc68d89 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -72,6 +72,7 @@ enum _dumpPreparedQueries PREPQUERY_DUMPOPR, PREPQUERY_DUMPRANGETYPE, PREPQUERY_DUMPTABLEATTACH, + PREPQUERY_GETATTRIBUTESTATS, PREPQUERY_GETCOLUMNACLS, PREPQUERY_GETDOMAINCONSTRAINTS, }; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a1823914656..0de6c959bb0 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -10415,10 +10415,8 @@ dumpComment(Archive *fout, const char *type, * param_name, param_type */ static const char *att_stats_arginfo[][2] = { - {"relation", "regclass"}, {"attname", "name"}, {"inherited", "boolean"}, - {"version", "integer"}, {"null_frac", "float4"}, {"avg_width", "integer"}, {"n_distinct", "float4"}, @@ -10434,60 +10432,6 @@ static const char *att_stats_arginfo[][2] = { {"range_bounds_histogram", "text"}, }; -/* - * getAttStatsExportQuery -- - * - * Generate a query that will fetch all attribute (e.g. pg_statistic) - * stats for a given relation. - */ -static void -getAttStatsExportQuery(PQExpBuffer query, Archive *fout, - const char *schemaname, const char *relname) -{ - resetPQExpBuffer(query); - appendPQExpBuffer(query, - "SELECT c.oid::regclass AS relation, " - "s.attname," - "s.inherited," - "'%u'::integer AS version, " - "s.null_frac," - "s.avg_width," - "s.n_distinct," - "s.most_common_vals," - "s.most_common_freqs," - "s.histogram_bounds," - "s.correlation," - "s.most_common_elems," - "s.most_common_elem_freqs," - "s.elem_count_histogram,", - fout->remoteVersion); - - if (fout->remoteVersion >= 170000) - appendPQExpBufferStr(query, - "s.range_length_histogram," - "s.range_empty_frac," - "s.range_bounds_histogram "); - else - appendPQExpBufferStr(query, - "NULL AS range_length_histogram," - "NULL AS range_empty_frac," - "NULL AS range_bounds_histogram "); - - appendPQExpBufferStr(query, - "FROM pg_stats s " - "JOIN pg_namespace n " - "ON n.nspname = s.schemaname " - "JOIN pg_class c " - "ON c.relname = s.tablename " - "AND c.relnamespace = n.oid " - "WHERE s.schemaname = "); - appendStringLiteralAH(query, schemaname, fout); - appendPQExpBufferStr(query, " AND s.tablename = "); - appendStringLiteralAH(query, relname, fout); - appendPQExpBufferStr(query, " ORDER BY s.attname, s.inherited"); -} - - /* * appendNamedArgument -- * @@ -10513,17 +10457,17 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, * Append a formatted pg_restore_relation_stats statement. */ static void -appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo) +appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo, + const char *qualified_name) { - const char *qualname = fmtQualifiedId(rsinfo->dobj.namespace->dobj.name, rsinfo->dobj.name); char reltuples_str[FLOAT_SHORTEST_DECIMAL_LEN]; float_to_shortest_decimal_buf(rsinfo->reltuples, reltuples_str); appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n"); - appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", qualname); appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", fout->remoteVersion); + appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", qualified_name); appendPQExpBuffer(out, "\t'relpages', '%d'::integer,\n", rsinfo->relpages); appendPQExpBuffer(out, "\t'reltuples', '%s'::real,\n", reltuples_str); appendPQExpBuffer(out, "\t'relallvisible', '%d'::integer\n);\n", @@ -10536,13 +10480,18 @@ appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo) * Append a series of formatted pg_restore_attribute_stats statements. */ static void -appendAttStatsImport(PQExpBuffer out, Archive *fout, PGresult *res) +appendAttStatsImport(PQExpBuffer out, Archive *fout, PGresult *res, + const char *qualified_name) { for (int rownum = 0; rownum < PQntuples(res); rownum++) { const char *sep = ""; appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n"); + appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", + fout->remoteVersion); + appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", + qualified_name); for (int argno = 0; argno < lengthof(att_stats_arginfo); argno++) { const char *argname = att_stats_arginfo[argno][0]; @@ -10607,6 +10556,7 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) DumpableObject *dobj = (DumpableObject *) &rsinfo->dobj; DumpId *deps = NULL; int ndeps = 0; + const char *qualified_name; /* nothing to do if we are not dumping statistics */ if (!fout->dopt->dumpStatistics) @@ -10622,15 +10572,56 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) tag = createPQExpBuffer(); appendPQExpBufferStr(tag, fmtId(dobj->name)); - out = createPQExpBuffer(); + query = createPQExpBuffer(); + if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) + { + appendPQExpBufferStr(query, + "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n" + "SELECT s.attname, s.inherited, " + "s.null_frac, s.avg_width, s.n_distinct, " + "s.most_common_vals, s.most_common_freqs, " + "s.histogram_bounds, s.correlation, " + "s.most_common_elems, s.most_common_elem_freqs, " + "s.elem_count_histogram, "); + + if (fout->remoteVersion >= 170000) + appendPQExpBufferStr(query, + "s.range_length_histogram, s.range_empty_frac, " + "s.range_bounds_histogram "); + else + appendPQExpBufferStr(query, + "NULL AS range_length_histogram," + "NULL AS range_empty_frac," + "NULL AS range_bounds_histogram "); - appendRelStatsImport(out, fout, rsinfo); + appendPQExpBufferStr(query, + "FROM pg_stats s " + "WHERE s.schemaname = $1 " + "AND s.tablename = $2 " + "ORDER BY s.attname, s.inherited"); + + ExecuteSqlStatement(fout, query->data); + + fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS] = true; + resetPQExpBuffer(query); + } + + appendPQExpBufferStr(query, "EXECUTE getAttributeStats("); + appendStringLiteralAH(query, dobj->namespace->dobj.name, fout); + appendPQExpBufferStr(query, ", "); + appendStringLiteralAH(query, dobj->name, fout); + appendPQExpBufferStr(query, "); "); - query = createPQExpBuffer(); - getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name, - dobj->name); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - appendAttStatsImport(out, fout, res); + + out = createPQExpBuffer(); + + qualified_name = fmtQualifiedId(rsinfo->dobj.namespace->dobj.name, + rsinfo->dobj.name); + + appendRelStatsImport(out, fout, rsinfo, qualified_name); + appendAttStatsImport(out, fout, res, qualified_name); + PQclear(res); ArchiveEntry(fout, nilCatalogId, createDumpId(), -- 2.34.1