On Thu, Nov 13, 2025 at 5:41 PM Álvaro Herrera <[email protected]> wrote:
>

>
> > @@ -15658,10 +15658,19 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid 
> > refRelId, char *cmd,
> >                       querytree_list = list_concat(querytree_list, 
> > afterStmts);
> >               }
> >               else if (IsA(stmt, CreateStatsStmt))
> > +             {
> > +                     RangeTblEntry   *rte;
> > +                     CreateStatsStmt *ss = castNode(CreateStatsStmt, stmt);
> > +
> > +                     rte = makeNode(RangeTblEntry);
> > +                     rte->rtekind = RTE_RELATION;
> > +                     rte->relid = oldRelId;
> > +                     rte->rellockmode = ShareUpdateExclusiveLock;
> > +                     ss->rtable = list_make1(rte);
> > +
> >                       querytree_list = lappend(querytree_list,
> > -                                                                      
> > transformStatsStmt(oldRelId,
> > -                                                                           
> >                                   (CreateStatsStmt *) stmt,
> > -                                                                           
> >                                   cmd));
> > +                                                                      
> > transformStatsStmt(ss, cmd));
> > +             }
> >               else
> >                       querytree_list = lappend(querytree_list, stmt);
> >       }
>
> Hmm, how would this part here work in the hypothetical world where a
> stats object references multiple relations?
>
hi.

For extended statistics that span multiple relations (hypothetically),
we should have the OIDs of
all involved relations within the pg_statistic_ext catalog. Without this , it's
hard to do statistic's expression deparse, changing column data types
or generation
expressions. Catalog pg_depend do have statistics associated relation OIDs
information, relying on it for statistical expression deparsing would be more
complicated.

Once pg_statistic_ext have all relation oid information,
RememberStatisticsForRebuilding will not only collecting
AlteredTableInfo->changedStatisticsOids, it will aslo collect the OID of other
relations associated with this statistic.

For the above quoted part, we should construct a RangeTblEntry for
each associated
relation (OID).
Obviously transformStatsStmt itself needs to figure out how to
deal with multi-relation expressions.

rebased and minor polishing.



--
jian
https://www.enterprisedb.com
From e9e4a0a85b3244da89e45ba7f258e57c109c3610 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Wed, 17 Dec 2025 13:44:46 +0800
Subject: [PATCH v6 1/1] Restructure CreateStatsStmt parse-analysis processing

Previously, a lot of code would be run in the ProcessUtility pipeline,
where it is not welcome (that's supposed to be just a giant dispatcher
switch).  Move the parse analysis code to CreateStatistics() instead,
which then means we don't need to open the relation twice; it also
allows us to give a better error message when something other than a
relation name is given in the FROM clause.

Generally we should avoid look up the same less-than-fully-qualified name
multiple times, we might get different answers due to concurrent activity, and
that might create a security vulnerability, along the lines of CVE-2014-0062.

Refactor so that a CreateStatsStmt carries both the untransformed FROM clause
and the range table that we get when it's transformed. That way, once the name
lookups have been done, we can propagate the results forward to future
processing steps and avoid ever repeating the lookup.

discussion: https://postgr.es/m/cacjufxgxlu400qbbgdoboaza0xk58rqqscaprxbamumo0f8...@mail.gmail.com
discussion: https://postgr.es/m/CA+TgmobyMXzoEzscCRDCggHRCTp1TW=dm9pehmwoykos43w...@mail.gmail.com
commitfest: https://commitfest.postgresql.org/patch/6106
---
 src/backend/commands/statscmds.c   |  65 ++-------------
 src/backend/commands/tablecmds.c   |  17 +++-
 src/backend/parser/gram.y          |   6 +-
 src/backend/parser/parse_utilcmd.c | 124 +++++++++++++++++++++++++++--
 src/backend/tcop/utility.c         |  29 +------
 src/include/commands/defrem.h      |   2 +-
 src/include/nodes/parsenodes.h     |   4 +
 src/include/parser/parse_utilcmd.h |   3 +-
 8 files changed, 149 insertions(+), 101 deletions(-)

diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 77b1a6e2dc5..e323190cb11 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -30,6 +30,7 @@
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
+#include "parser/parse_utilcmd.h"
 #include "statistics/statistics.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -60,7 +61,7 @@ compare_int16(const void *a, const void *b)
  *		CREATE STATISTICS
  */
 ObjectAddress
-CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
+CreateStatistics(CreateStatsStmt *stmt, bool check_rights, const char *queryString)
 {
 	int16		attnums[STATS_MAX_DIMENSIONS];
 	int			nattnums = 0;
@@ -78,6 +79,7 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 	Datum		exprsDatum;
 	Relation	statrel;
 	Relation	rel = NULL;
+	RangeTblEntry *rte;
 	Oid			relid;
 	ObjectAddress parentobject,
 				myself;
@@ -95,64 +97,13 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 
 	Assert(IsA(stmt, CreateStatsStmt));
 
-	/*
-	 * Examine the FROM clause.  Currently, we only allow it to be a single
-	 * simple table, but later we'll probably allow multiple tables and JOIN
-	 * syntax.  The grammar is already prepared for that, so we have to check
-	 * here that what we got is what we can support.
-	 */
-	if (list_length(stmt->relations) != 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("only a single relation is allowed in CREATE STATISTICS")));
+	/* Run parse analysis */
+	stmt = transformStatsStmt(stmt, queryString);
 
-	foreach(cell, stmt->relations)
-	{
-		Node	   *rln = (Node *) lfirst(cell);
+	Assert(list_length(stmt->rtable) == list_length(stmt->from_clause));
 
-		if (!IsA(rln, RangeVar))
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("only a single relation is allowed in CREATE STATISTICS")));
-
-		/*
-		 * CREATE STATISTICS will influence future execution plans but does
-		 * not interfere with currently executing plans.  So it should be
-		 * enough to take only ShareUpdateExclusiveLock on relation,
-		 * conflicting with ANALYZE and other DDL that sets statistical
-		 * information, but not with normal queries.
-		 */
-		rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
-
-		/* Restrict to allowed relation types */
-		if (rel->rd_rel->relkind != RELKIND_RELATION &&
-			rel->rd_rel->relkind != RELKIND_MATVIEW &&
-			rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
-			rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("cannot define statistics for relation \"%s\"",
-							RelationGetRelationName(rel)),
-					 errdetail_relkind_not_supported(rel->rd_rel->relkind)));
-
-		/*
-		 * You must own the relation to create stats on it.
-		 *
-		 * NB: Concurrent changes could cause this function's lookup to find a
-		 * different relation than a previous lookup by the caller, so we must
-		 * perform this check even when check_rights == false.
-		 */
-		if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), stxowner))
-			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
-						   RelationGetRelationName(rel));
-
-		/* Creating statistics on system catalogs is not allowed */
-		if (!allowSystemTableMods && IsSystemRelation(rel))
-			ereport(ERROR,
-					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
-					 errmsg("permission denied: \"%s\" is a system catalog",
-							RelationGetRelationName(rel))));
-	}
+	rte = linitial_node(RangeTblEntry, stmt->rtable);
+	rel = relation_open(rte->relid, NoLock);
 
 	Assert(rel);
 	relid = RelationGetRelid(rel);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 6b1a00ed477..3fb9d9afe23 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -9718,7 +9718,7 @@ ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
 	/* The CreateStatsStmt has already been through transformStatsStmt */
 	Assert(stmt->transformed);
 
-	address = CreateStatistics(stmt, !is_rebuild);
+	address = CreateStatistics(stmt, !is_rebuild, NULL);
 
 	return address;
 }
@@ -15694,10 +15694,19 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
 			querytree_list = list_concat(querytree_list, afterStmts);
 		}
 		else if (IsA(stmt, CreateStatsStmt))
+		{
+			RangeTblEntry *rte;
+			CreateStatsStmt *ss = castNode(CreateStatsStmt, stmt);
+
+			rte = makeNode(RangeTblEntry);
+			rte->rtekind = RTE_RELATION;
+			rte->relid = oldRelId;
+			rte->rellockmode = ShareUpdateExclusiveLock;
+			ss->rtable = list_make1(rte);
+
 			querytree_list = lappend(querytree_list,
-									 transformStatsStmt(oldRelId,
-														(CreateStatsStmt *) stmt,
-														cmd));
+									 transformStatsStmt(ss, cmd));
+		}
 		else
 			querytree_list = lappend(querytree_list, stmt);
 	}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 28f4e11e30f..7a4ee97988d 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -4830,7 +4830,8 @@ CreateStatsStmt:
 					n->defnames = $3;
 					n->stat_types = $4;
 					n->exprs = $6;
-					n->relations = $8;
+					n->from_clause = $8;
+					n->rtable = NIL;
 					n->stxcomment = NULL;
 					n->if_not_exists = false;
 					$$ = (Node *) n;
@@ -4843,7 +4844,8 @@ CreateStatsStmt:
 					n->defnames = $6;
 					n->stat_types = $7;
 					n->exprs = $9;
-					n->relations = $11;
+					n->from_clause = $11;
+					n->rtable = NIL;
 					n->stxcomment = NULL;
 					n->if_not_exists = true;
 					$$ = (Node *) n;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 375b40b29af..0817c4e63f1 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -2052,6 +2052,7 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 	HeapTuple	ht_stats;
 	Form_pg_statistic_ext statsrec;
 	CreateStatsStmt *stats;
+	RangeTblEntry *rte;
 	List	   *stat_types = NIL;
 	List	   *def_names = NIL;
 	bool		isnull;
@@ -2155,7 +2156,15 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 	stats->defnames = NULL;
 	stats->stat_types = stat_types;
 	stats->exprs = def_names;
-	stats->relations = list_make1(heapRel);
+	stats->from_clause = list_make1(heapRel);
+
+	/* Create a range table entry. */
+	rte = makeNode(RangeTblEntry);
+	rte->rtekind = RTE_RELATION;
+	rte->relid = heapRelid;
+	rte->rellockmode = ShareUpdateExclusiveLock;
+	stats->rtable = list_make1(rte);
+
 	stats->stxcomment = NULL;
 	stats->transformed = true;	/* don't need transformStatsStmt again */
 	stats->if_not_exists = false;
@@ -3139,32 +3148,46 @@ transformIndexStmt(Oid relid, IndexStmt *stmt, const char *queryString)
 /*
  * transformStatsStmt - parse analysis for CREATE STATISTICS
  *
- * To avoid race conditions, it's important that this function relies only on
- * the passed-in relid (and not on stmt->relation) to determine the target
- * relation.
+ * This is used by CREATE STATISTICS. Changing table column’s data type or
+ * generated expression will trigger a rebuild of the related table’s
+ * statistics, during which this will also be used.
  */
 CreateStatsStmt *
-transformStatsStmt(Oid relid, CreateStatsStmt *stmt, const char *queryString)
+transformStatsStmt(CreateStatsStmt *stmt, const char *queryString)
 {
 	ParseState *pstate;
 	ParseNamespaceItem *nsitem;
 	ListCell   *l;
 	Relation	rel;
+	RangeTblEntry *rte;
 
 	/* Nothing to do if statement already transformed. */
 	if (stmt->transformed)
 		return stmt;
 
+	/* Construct range table if not done yet. */
+	if (stmt->rtable == NIL)
+		stmt->rtable = transformStatsFromClause(stmt->from_clause);
+
+	/*
+	 * The range table should have been derived from a FROM clause consisting
+	 * of a single RangeVar, so we expect a single RangeTblEntry.
+	 */
+	Assert(list_length(stmt->rtable) == 1);
+	rte = linitial(stmt->rtable);
+	Assert(IsA(rte, RangeTblEntry));
+
+	/* See transformStatsFromClause for notes on lock level. */
+	rel = table_open(rte->relid, NoLock);
+
 	/* Set up pstate */
 	pstate = make_parsestate(NULL);
 	pstate->p_sourcetext = queryString;
 
 	/*
 	 * Put the parent table into the rtable so that the expressions can refer
-	 * to its fields without qualification.  Caller is responsible for locking
-	 * relation, but we still need to open it.
+	 * to its fields without qualification.
 	 */
-	rel = relation_open(relid, NoLock);
 	nsitem = addRangeTableEntryForRelation(pstate, rel,
 										   AccessShareLock,
 										   NULL, false, true);
@@ -3208,6 +3231,91 @@ transformStatsStmt(Oid relid, CreateStatsStmt *stmt, const char *queryString)
 	return stmt;
 }
 
+/*
+ * Accepts a list of RangeVar objects and returns a corresponding list of
+ * RangeTblEntry objects. For use while transforming a CreateStatsStmt.
+ *
+ * At present, the only case we know how to handle is a FROM clause
+ * consisting of a single RangeVar.
+ */
+List *
+transformStatsFromClause(List *from_clause)
+{
+	RangeTblEntry *rte;
+	ListCell   *cell;
+	Relation	rel = NULL;
+
+	/*
+	 * Examine the FROM clause.  Currently, we only allow it to be a single
+	 * simple table, but later we'll probably allow multiple tables and JOIN
+	 * syntax.  The grammar is already prepared for that, so we have to check
+	 * here that what we got is what we can support.
+	 */
+	if (list_length(from_clause) != 1)
+		ereport(ERROR,
+				errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				errmsg("only a single relation is allowed in CREATE STATISTICS"));
+
+	foreach(cell, from_clause)
+	{
+		Node	   *rln = (Node *) lfirst(cell);
+
+		if (!IsA(rln, RangeVar))
+			ereport(ERROR,
+					errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("CREATE STATISTICS only supports relation names in the FROM clause"));
+
+		/*
+		 * CREATE STATISTICS will influence future execution plans but does
+		 * not interfere with currently executing plans.  So it should be
+		 * enough to take only ShareUpdateExclusiveLock on relation,
+		 * conflicting with ANALYZE and other DDL that sets statistical
+		 * information, but not with normal queries.
+		 */
+		rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
+
+		/* Restrict to allowed relation types */
+		if (rel->rd_rel->relkind != RELKIND_RELATION &&
+			rel->rd_rel->relkind != RELKIND_MATVIEW &&
+			rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+			rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+			ereport(ERROR,
+					errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					errmsg("cannot define statistics for relation \"%s\"",
+						   RelationGetRelationName(rel)),
+					errdetail_relkind_not_supported(rel->rd_rel->relkind));
+
+		/*
+		 * You must own the relation to create stats on it.
+		 *
+		 * NB: Concurrent changes could cause this function's lookup to find a
+		 * different relation than a previous lookup by the caller, so we must
+		 * perform this check unconditionally.
+		 */
+		if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
+						   RelationGetRelationName(rel));
+
+		/* Creating statistics on system catalogs is not allowed */
+		if (!allowSystemTableMods && IsSystemRelation(rel))
+			ereport(ERROR,
+					errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					errmsg("permission denied: \"%s\" is a system catalog",
+						   RelationGetRelationName(rel)));
+	}
+
+	/* Create a range table entry. */
+	rte = makeNode(RangeTblEntry);
+	rte->rtekind = RTE_RELATION;
+	rte->relid = RelationGetRelid(rel);
+	rte->rellockmode = ShareUpdateExclusiveLock;
+
+	/* Close relation */
+	relation_close(rel, NoLock);
+
+	return list_make1(rte);
+}
+
 
 /*
  * transformRuleStmt -
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index d18a3a60a46..0c31113ebb5 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1874,34 +1874,7 @@ ProcessUtilitySlow(ParseState *pstate,
 				break;
 
 			case T_CreateStatsStmt:
-				{
-					Oid			relid;
-					CreateStatsStmt *stmt = (CreateStatsStmt *) parsetree;
-					RangeVar   *rel = (RangeVar *) linitial(stmt->relations);
-
-					if (!IsA(rel, RangeVar))
-						ereport(ERROR,
-								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-								 errmsg("CREATE STATISTICS only supports relation names in the FROM clause")));
-
-					/*
-					 * CREATE STATISTICS will influence future execution plans
-					 * but does not interfere with currently executing plans.
-					 * So it should be enough to take ShareUpdateExclusiveLock
-					 * on relation, conflicting with ANALYZE and other DDL
-					 * that sets statistical information, but not with normal
-					 * queries.
-					 *
-					 * XXX RangeVarCallbackOwnsRelation not needed here, to
-					 * keep the same behavior as before.
-					 */
-					relid = RangeVarGetRelid(rel, ShareUpdateExclusiveLock, false);
-
-					/* Run parse analysis ... */
-					stmt = transformStatsStmt(relid, stmt, queryString);
-
-					address = CreateStatistics(stmt, true);
-				}
+				address = CreateStatistics((CreateStatsStmt *) parsetree, true, queryString);
 				break;
 
 			case T_AlterStatsStmt:
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index f3432b4b6a1..95397f91c5a 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -85,7 +85,7 @@ extern void RemoveOperatorById(Oid operOid);
 extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
 
 /* commands/statscmds.c */
-extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt, bool check_rights);
+extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt, bool check_rights, const char *queryString);
 extern ObjectAddress AlterStatistics(AlterStatsStmt *stmt);
 extern void RemoveStatisticsById(Oid statsOid);
 extern void RemoveStatisticsDataById(Oid statsOid, bool inh);
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index bc7adba4a0f..326d30cfb2f 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -3551,6 +3551,10 @@ typedef struct CreateStatsStmt
 	List	   *stat_types;		/* stat types (list of String) */
 	List	   *exprs;			/* expressions to build statistics on */
 	List	   *relations;		/* rels to build stats on (list of RangeVar) */
+	List	   *from_clause;	/* FROM clause ((list of RangeVar)) */
+	List	   *rtable;			/* It’s not derived directly from the
+								 * parser, instead it comes from parse
+								 * analysis. (list of RangeTblEntry) */
 	char	   *stxcomment;		/* comment to apply to stats, or NULL */
 	bool		transformed;	/* true when transformStatsStmt is finished */
 	bool		if_not_exists;	/* do nothing if stats name already exists */
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index 4965fac4495..499e9ebef1c 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -26,8 +26,9 @@ extern AlterTableStmt *transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
 											   List **afterStmts);
 extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt,
 									 const char *queryString);
-extern CreateStatsStmt *transformStatsStmt(Oid relid, CreateStatsStmt *stmt,
+extern CreateStatsStmt *transformStatsStmt(CreateStatsStmt *stmt,
 										   const char *queryString);
+extern List *transformStatsFromClause(List *from_clause);
 extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
 							  List **actions, Node **whereClause);
 extern List *transformCreateSchemaStmtElements(List *schemaElts,
-- 
2.34.1

Reply via email to