On Mon, 2025-02-24 at 09:54 -0500, Andres Freund wrote:
> Have you compared performance of with/without stats after these
> optimizations?
On unoptimized build with asserts enabled, dumping the regression
database:
--no-statistics: 1.0s
master: 3.6s
v3j-0001: 3.0s
v3j-0002: 1.7s
I plan to commit the patches soon.
Regards,
Jeff Davis
From d617fb142158e0ca964e5bc8bb3351d993de6062 Mon Sep 17 00:00:00 2001
From: Corey Huinker <[email protected]>
Date: Fri, 21 Feb 2025 23:31:04 -0500
Subject: [PATCH v3j 1/2] Avoid unnecessary relation stats query in pg_dump.
The few fields we need can be easily collected in getTables() and
getIndexes() and stored in RelStatsInfo.
Co-authored-by: Corey Huinker <[email protected]>
Co-authored-by: Jeff Davis <[email protected]>
Discussion: https://postgr.es/m/CADkLM=f0a43atd88xw4xcfayef25g-7htrhx_whv40hyocs...@mail.gmail.com
---
src/bin/pg_dump/pg_dump.c | 145 ++++++++++++++++----------------------
src/bin/pg_dump/pg_dump.h | 5 +-
2 files changed, 64 insertions(+), 86 deletions(-)
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index afd79287177..a1823914656 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -56,6 +56,7 @@
#include "common/connect.h"
#include "common/int.h"
#include "common/relpath.h"
+#include "common/shortest_dec.h"
#include "compress_io.h"
#include "dumputils.h"
#include "fe_utils/option_utils.h"
@@ -524,6 +525,9 @@ main(int argc, char **argv)
pg_logging_set_level(PG_LOG_WARNING);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_dump"));
+ /* ensure that locale does not affect floating point interpretation */
+ setlocale(LC_NUMERIC, "C");
+
/*
* Initialize what we need for parallel execution, especially for thread
* support on Windows.
@@ -6814,7 +6818,8 @@ getFuncs(Archive *fout)
*
*/
static RelStatsInfo *
-getRelationStatistics(Archive *fout, DumpableObject *rel, char relkind)
+getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages,
+ float reltuples, int32 relallvisible, char relkind)
{
if (!fout->dopt->dumpStatistics)
return NULL;
@@ -6839,6 +6844,9 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, char relkind)
dobj->components |= DUMP_COMPONENT_STATISTICS;
dobj->name = pg_strdup(rel->name);
dobj->namespace = rel->namespace;
+ info->relpages = relpages;
+ info->reltuples = reltuples;
+ info->relallvisible = relallvisible;
info->relkind = relkind;
info->postponed_def = false;
@@ -6874,6 +6882,8 @@ getTables(Archive *fout, int *numTables)
int i_relhasindex;
int i_relhasrules;
int i_relpages;
+ int i_reltuples;
+ int i_relallvisible;
int i_toastpages;
int i_owning_tab;
int i_owning_col;
@@ -6924,7 +6934,7 @@ getTables(Archive *fout, int *numTables)
"c.relowner, "
"c.relchecks, "
"c.relhasindex, c.relhasrules, c.relpages, "
- "c.relhastriggers, "
+ "c.reltuples, c.relallvisible, c.relhastriggers, "
"c.relpersistence, "
"c.reloftype, "
"c.relacl, "
@@ -7088,6 +7098,8 @@ getTables(Archive *fout, int *numTables)
i_relhasindex = PQfnumber(res, "relhasindex");
i_relhasrules = PQfnumber(res, "relhasrules");
i_relpages = PQfnumber(res, "relpages");
+ i_reltuples = PQfnumber(res, "reltuples");
+ i_relallvisible = PQfnumber(res, "relallvisible");
i_toastpages = PQfnumber(res, "toastpages");
i_owning_tab = PQfnumber(res, "owning_tab");
i_owning_col = PQfnumber(res, "owning_col");
@@ -7134,6 +7146,9 @@ getTables(Archive *fout, int *numTables)
for (i = 0; i < ntups; i++)
{
+ float reltuples = strtof(PQgetvalue(res, i, i_reltuples), NULL);
+ int32 relallvisible = atoi(PQgetvalue(res, i, i_relallvisible));
+
tblinfo[i].dobj.objType = DO_TABLE;
tblinfo[i].dobj.catId.tableoid = atooid(PQgetvalue(res, i, i_reltableoid));
tblinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_reloid));
@@ -7233,7 +7248,8 @@ getTables(Archive *fout, int *numTables)
/* Add statistics */
if (tblinfo[i].interesting)
- getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relkind);
+ getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relpages,
+ reltuples, relallvisible, tblinfo[i].relkind);
/*
* Read-lock target tables to make sure they aren't DROPPED or altered
@@ -7499,6 +7515,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
i_oid,
i_indrelid,
i_indexname,
+ i_relpages,
+ i_reltuples,
+ i_relallvisible,
i_parentidx,
i_indexdef,
i_indnkeyatts,
@@ -7552,6 +7571,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
appendPQExpBufferStr(query,
"SELECT t.tableoid, t.oid, i.indrelid, "
"t.relname AS indexname, "
+ "t.relpages, t.reltuples, t.relallvisible, "
"pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, "
"i.indkey, i.indisclustered, "
"c.contype, c.conname, "
@@ -7659,6 +7679,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
i_oid = PQfnumber(res, "oid");
i_indrelid = PQfnumber(res, "indrelid");
i_indexname = PQfnumber(res, "indexname");
+ i_relpages = PQfnumber(res, "relpages");
+ i_reltuples = PQfnumber(res, "reltuples");
+ i_relallvisible = PQfnumber(res, "relallvisible");
i_parentidx = PQfnumber(res, "parentidx");
i_indexdef = PQfnumber(res, "indexdef");
i_indnkeyatts = PQfnumber(res, "indnkeyatts");
@@ -7725,6 +7748,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
char contype;
char indexkind;
RelStatsInfo *relstats;
+ int32 relpages = atoi(PQgetvalue(res, j, i_relpages));
+ float reltuples = strtof(PQgetvalue(res, j, i_reltuples), NULL);
+ int32 relallvisible = atoi(PQgetvalue(res, j, i_relallvisible));
indxinfo[j].dobj.objType = DO_INDEX;
indxinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid));
@@ -7759,7 +7785,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
indexkind = RELKIND_PARTITIONED_INDEX;
contype = *(PQgetvalue(res, j, i_contype));
- relstats = getRelationStatistics(fout, &indxinfo[j].dobj, indexkind);
+ relstats = getRelationStatistics(fout, &indxinfo[j].dobj, relpages,
+ reltuples, relallvisible, indexkind);
if (contype == 'p' || contype == 'u' || contype == 'x')
{
@@ -10383,18 +10410,6 @@ dumpComment(Archive *fout, const char *type,
catalogId, subid, dumpId, NULL);
}
-/*
- * Tabular description of the parameters to pg_restore_relation_stats()
- * param_name, param_type
- */
-static const char *rel_stats_arginfo[][2] = {
- {"relation", "regclass"},
- {"version", "integer"},
- {"relpages", "integer"},
- {"reltuples", "real"},
- {"relallvisible", "integer"},
-};
-
/*
* Tabular description of the parameters to pg_restore_attribute_stats()
* param_name, param_type
@@ -10419,30 +10434,6 @@ static const char *att_stats_arginfo[][2] = {
{"range_bounds_histogram", "text"},
};
-/*
- * getRelStatsExportQuery --
- *
- * Generate a query that will fetch all relation (e.g. pg_class)
- * stats for a given relation.
- */
-static void
-getRelStatsExportQuery(PQExpBuffer query, Archive *fout,
- const char *schemaname, const char *relname)
-{
- resetPQExpBuffer(query);
- appendPQExpBufferStr(query,
- "SELECT c.oid::regclass AS relation, "
- "current_setting('server_version_num') AS version, "
- "c.relpages, c.reltuples, c.relallvisible "
- "FROM pg_class c "
- "JOIN pg_namespace n "
- "ON n.oid = c.relnamespace "
- "WHERE n.nspname = ");
- appendStringLiteralAH(query, schemaname, fout);
- appendPQExpBufferStr(query, " AND c.relname = ");
- appendStringLiteralAH(query, relname, fout);
-}
-
/*
* getAttStatsExportQuery --
*
@@ -10454,21 +10445,22 @@ getAttStatsExportQuery(PQExpBuffer query, Archive *fout,
const char *schemaname, const char *relname)
{
resetPQExpBuffer(query);
- appendPQExpBufferStr(query,
- "SELECT c.oid::regclass AS relation, "
- "s.attname,"
- "s.inherited,"
- "current_setting('server_version_num') AS version, "
- "s.null_frac,"
- "s.avg_width,"
- "s.n_distinct,"
- "s.most_common_vals,"
- "s.most_common_freqs,"
- "s.histogram_bounds,"
- "s.correlation,"
- "s.most_common_elems,"
- "s.most_common_elem_freqs,"
- "s.elem_count_histogram,");
+ appendPQExpBuffer(query,
+ "SELECT c.oid::regclass AS relation, "
+ "s.attname,"
+ "s.inherited,"
+ "'%u'::integer AS version, "
+ "s.null_frac,"
+ "s.avg_width,"
+ "s.n_distinct,"
+ "s.most_common_vals,"
+ "s.most_common_freqs,"
+ "s.histogram_bounds,"
+ "s.correlation,"
+ "s.most_common_elems,"
+ "s.most_common_elem_freqs,"
+ "s.elem_count_histogram,",
+ fout->remoteVersion);
if (fout->remoteVersion >= 170000)
appendPQExpBufferStr(query,
@@ -10521,34 +10513,21 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
* Append a formatted pg_restore_relation_stats statement.
*/
static void
-appendRelStatsImport(PQExpBuffer out, Archive *fout, PGresult *res)
+appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo)
{
- const char *sep = "";
+ const char *qualname = fmtQualifiedId(rsinfo->dobj.namespace->dobj.name, rsinfo->dobj.name);
+ char reltuples_str[FLOAT_SHORTEST_DECIMAL_LEN];
- if (PQntuples(res) == 0)
- return;
+ float_to_shortest_decimal_buf(rsinfo->reltuples, reltuples_str);
appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n");
-
- for (int argno = 0; argno < lengthof(rel_stats_arginfo); argno++)
- {
- const char *argname = rel_stats_arginfo[argno][0];
- const char *argtype = rel_stats_arginfo[argno][1];
- int fieldno = PQfnumber(res, argname);
-
- if (fieldno < 0)
- pg_fatal("relation stats export query missing field '%s'",
- argname);
-
- if (PQgetisnull(res, 0, fieldno))
- continue;
-
- appendPQExpBufferStr(out, sep);
- appendNamedArgument(out, fout, argname, PQgetvalue(res, 0, fieldno), argtype);
-
- sep = ",\n";
- }
- appendPQExpBufferStr(out, "\n);\n");
+ appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", qualname);
+ appendPQExpBuffer(out, "\t'version', '%u'::integer,\n",
+ fout->remoteVersion);
+ appendPQExpBuffer(out, "\t'relpages', '%d'::integer,\n", rsinfo->relpages);
+ appendPQExpBuffer(out, "\t'reltuples', '%s'::real,\n", reltuples_str);
+ appendPQExpBuffer(out, "\t'relallvisible', '%d'::integer\n);\n",
+ rsinfo->relallvisible);
}
/*
@@ -10643,15 +10622,11 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
tag = createPQExpBuffer();
appendPQExpBufferStr(tag, fmtId(dobj->name));
- query = createPQExpBuffer();
out = createPQExpBuffer();
- getRelStatsExportQuery(query, fout, dobj->namespace->dobj.name,
- dobj->name);
- res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
- appendRelStatsImport(out, fout, res);
- PQclear(res);
+ appendRelStatsImport(out, fout, rsinfo);
+ query = createPQExpBuffer();
getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name,
dobj->name);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index f08f5905aa3..9d6a4857c4b 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -328,7 +328,7 @@ typedef struct _tableInfo
Oid owning_tab; /* OID of table owning sequence */
int owning_col; /* attr # of column owning sequence */
bool is_identity_sequence;
- int relpages; /* table's size in pages (from pg_class) */
+ int32 relpages; /* table's size in pages (from pg_class) */
int toastpages; /* toast table's size in pages, if any */
bool interesting; /* true if need to collect more data */
@@ -438,6 +438,9 @@ typedef struct _indexAttachInfo
typedef struct _relStatsInfo
{
DumpableObject dobj;
+ int32 relpages;
+ float reltuples;
+ int32 relallvisible;
char relkind; /* 'r', 'm', 'i', etc */
bool postponed_def; /* stats must be postponed into post-data */
} RelStatsInfo;
--
2.34.1
From 33cde5fdbb207a99bce678015e1e45df0210bb56 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Tue, 25 Feb 2025 17:15:47 -0800
Subject: [PATCH v3j 2/2] pg_dump: prepare attribute stats query.
Follow precedent in pg_dump for preparing queries to improve
performance. Also, simplify the query by removing unnecessary joins.
Reported-by: Andres Freund <[email protected]>
Co-authored-by: Corey Huinker <[email protected]>
Co-authored-by: Jeff Davis <[email protected]>
Discussion: https://postgr.es/m/CADkLM=drmc6t8gp9gvf6y6e_r5echqjmaah_vpyih_zmiq0...@mail.gmail.com
---
src/bin/pg_dump/pg_backup.h | 1 +
src/bin/pg_dump/pg_dump.c | 123 +++++++++++++++++-------------------
2 files changed, 58 insertions(+), 66 deletions(-)
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 350cf659c41..e783cc68d89 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -72,6 +72,7 @@ enum _dumpPreparedQueries
PREPQUERY_DUMPOPR,
PREPQUERY_DUMPRANGETYPE,
PREPQUERY_DUMPTABLEATTACH,
+ PREPQUERY_GETATTRIBUTESTATS,
PREPQUERY_GETCOLUMNACLS,
PREPQUERY_GETDOMAINCONSTRAINTS,
};
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index a1823914656..0de6c959bb0 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -10415,10 +10415,8 @@ dumpComment(Archive *fout, const char *type,
* param_name, param_type
*/
static const char *att_stats_arginfo[][2] = {
- {"relation", "regclass"},
{"attname", "name"},
{"inherited", "boolean"},
- {"version", "integer"},
{"null_frac", "float4"},
{"avg_width", "integer"},
{"n_distinct", "float4"},
@@ -10434,60 +10432,6 @@ static const char *att_stats_arginfo[][2] = {
{"range_bounds_histogram", "text"},
};
-/*
- * getAttStatsExportQuery --
- *
- * Generate a query that will fetch all attribute (e.g. pg_statistic)
- * stats for a given relation.
- */
-static void
-getAttStatsExportQuery(PQExpBuffer query, Archive *fout,
- const char *schemaname, const char *relname)
-{
- resetPQExpBuffer(query);
- appendPQExpBuffer(query,
- "SELECT c.oid::regclass AS relation, "
- "s.attname,"
- "s.inherited,"
- "'%u'::integer AS version, "
- "s.null_frac,"
- "s.avg_width,"
- "s.n_distinct,"
- "s.most_common_vals,"
- "s.most_common_freqs,"
- "s.histogram_bounds,"
- "s.correlation,"
- "s.most_common_elems,"
- "s.most_common_elem_freqs,"
- "s.elem_count_histogram,",
- fout->remoteVersion);
-
- if (fout->remoteVersion >= 170000)
- appendPQExpBufferStr(query,
- "s.range_length_histogram,"
- "s.range_empty_frac,"
- "s.range_bounds_histogram ");
- else
- appendPQExpBufferStr(query,
- "NULL AS range_length_histogram,"
- "NULL AS range_empty_frac,"
- "NULL AS range_bounds_histogram ");
-
- appendPQExpBufferStr(query,
- "FROM pg_stats s "
- "JOIN pg_namespace n "
- "ON n.nspname = s.schemaname "
- "JOIN pg_class c "
- "ON c.relname = s.tablename "
- "AND c.relnamespace = n.oid "
- "WHERE s.schemaname = ");
- appendStringLiteralAH(query, schemaname, fout);
- appendPQExpBufferStr(query, " AND s.tablename = ");
- appendStringLiteralAH(query, relname, fout);
- appendPQExpBufferStr(query, " ORDER BY s.attname, s.inherited");
-}
-
-
/*
* appendNamedArgument --
*
@@ -10513,17 +10457,17 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
* Append a formatted pg_restore_relation_stats statement.
*/
static void
-appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo)
+appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo,
+ const char *qualified_name)
{
- const char *qualname = fmtQualifiedId(rsinfo->dobj.namespace->dobj.name, rsinfo->dobj.name);
char reltuples_str[FLOAT_SHORTEST_DECIMAL_LEN];
float_to_shortest_decimal_buf(rsinfo->reltuples, reltuples_str);
appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n");
- appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", qualname);
appendPQExpBuffer(out, "\t'version', '%u'::integer,\n",
fout->remoteVersion);
+ appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", qualified_name);
appendPQExpBuffer(out, "\t'relpages', '%d'::integer,\n", rsinfo->relpages);
appendPQExpBuffer(out, "\t'reltuples', '%s'::real,\n", reltuples_str);
appendPQExpBuffer(out, "\t'relallvisible', '%d'::integer\n);\n",
@@ -10536,13 +10480,18 @@ appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo)
* Append a series of formatted pg_restore_attribute_stats statements.
*/
static void
-appendAttStatsImport(PQExpBuffer out, Archive *fout, PGresult *res)
+appendAttStatsImport(PQExpBuffer out, Archive *fout, PGresult *res,
+ const char *qualified_name)
{
for (int rownum = 0; rownum < PQntuples(res); rownum++)
{
const char *sep = "";
appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n");
+ appendPQExpBuffer(out, "\t'version', '%u'::integer,\n",
+ fout->remoteVersion);
+ appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n",
+ qualified_name);
for (int argno = 0; argno < lengthof(att_stats_arginfo); argno++)
{
const char *argname = att_stats_arginfo[argno][0];
@@ -10607,6 +10556,7 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
DumpableObject *dobj = (DumpableObject *) &rsinfo->dobj;
DumpId *deps = NULL;
int ndeps = 0;
+ const char *qualified_name;
/* nothing to do if we are not dumping statistics */
if (!fout->dopt->dumpStatistics)
@@ -10622,15 +10572,56 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
tag = createPQExpBuffer();
appendPQExpBufferStr(tag, fmtId(dobj->name));
- out = createPQExpBuffer();
+ query = createPQExpBuffer();
+ if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
+ {
+ appendPQExpBufferStr(query,
+ "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n"
+ "SELECT s.attname, s.inherited, "
+ "s.null_frac, s.avg_width, s.n_distinct, "
+ "s.most_common_vals, s.most_common_freqs, "
+ "s.histogram_bounds, s.correlation, "
+ "s.most_common_elems, s.most_common_elem_freqs, "
+ "s.elem_count_histogram, ");
+
+ if (fout->remoteVersion >= 170000)
+ appendPQExpBufferStr(query,
+ "s.range_length_histogram, s.range_empty_frac, "
+ "s.range_bounds_histogram ");
+ else
+ appendPQExpBufferStr(query,
+ "NULL AS range_length_histogram,"
+ "NULL AS range_empty_frac,"
+ "NULL AS range_bounds_histogram ");
- appendRelStatsImport(out, fout, rsinfo);
+ appendPQExpBufferStr(query,
+ "FROM pg_stats s "
+ "WHERE s.schemaname = $1 "
+ "AND s.tablename = $2 "
+ "ORDER BY s.attname, s.inherited");
+
+ ExecuteSqlStatement(fout, query->data);
+
+ fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS] = true;
+ resetPQExpBuffer(query);
+ }
+
+ appendPQExpBufferStr(query, "EXECUTE getAttributeStats(");
+ appendStringLiteralAH(query, dobj->namespace->dobj.name, fout);
+ appendPQExpBufferStr(query, ", ");
+ appendStringLiteralAH(query, dobj->name, fout);
+ appendPQExpBufferStr(query, "); ");
- query = createPQExpBuffer();
- getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name,
- dobj->name);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
- appendAttStatsImport(out, fout, res);
+
+ out = createPQExpBuffer();
+
+ qualified_name = fmtQualifiedId(rsinfo->dobj.namespace->dobj.name,
+ rsinfo->dobj.name);
+
+ appendRelStatsImport(out, fout, rsinfo, qualified_name);
+ appendAttStatsImport(out, fout, res, qualified_name);
+
PQclear(res);
ArchiveEntry(fout, nilCatalogId, createDumpId(),
--
2.34.1