I wrote:
> The oversight here is that we don't use appendrel planning for
> a top-level UNION ALL construct.  That didn't use to matter,
> because you always got the same stupid Append plan either way.
> Now it seems like we ought to have some more intelligence for the
> top-level SetOp case.  I smell some code refactoring coming up.

I did some hacking on this and came up with the attached patch, which
could use a bit more work on the comments but passes regression tests.
However, this just solves the issue of being smart about top-level
UNION ALL cases.  It might be worth looking into using MergeAppend
for the sorting required for other types of set operations.  That would
involve quite a different patch, and I'm not sure if it'd remove the
need for this one or not.

                        regards, tom lane

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 07301c77fbf4f2550367ea8ff58c685462835223..a1055978869c5072db6e3a172169d934062ae2aa 100644
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
*************** subquery_planner(PlannerGlobal *glob, Qu
*** 341,353 ****
  	inline_set_returning_functions(root);
  
  	/*
! 	 * Check to see if any subqueries in the rangetable can be merged into
  	 * this query.
  	 */
  	parse->jointree = (FromExpr *)
  		pull_up_subqueries(root, (Node *) parse->jointree, NULL, NULL);
  
  	/*
  	 * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
  	 * avoid the expense of doing flatten_join_alias_vars().  Also check for
  	 * outer joins --- if none, we can skip reduce_outer_joins(). This must be
--- 341,362 ----
  	inline_set_returning_functions(root);
  
  	/*
! 	 * Check to see if any subqueries in the jointree can be merged into
  	 * this query.
  	 */
  	parse->jointree = (FromExpr *)
  		pull_up_subqueries(root, (Node *) parse->jointree, NULL, NULL);
  
  	/*
+ 	 * If this is a simple UNION ALL query, flatten it into an appendrel.
+ 	 * We do this now because it requires applying pull_up_subqueries to the
+ 	 * leaf queries of the UNION ALL, which weren't touched above because they
+ 	 * aren't referenced by the jointree until we do this.
+ 	 */
+ 	if (parse->setOperations)
+ 		flatten_union_all(root);
+ 
+ 	/*
  	 * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
  	 * avoid the expense of doing flatten_join_alias_vars().  Also check for
  	 * outer joins --- if none, we can skip reduce_outer_joins(). This must be
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index e337751328bf0b05e3025a2ecc302f754b766855..847e67959a8b81a5cf8ea55ae48d57ec98f32285 100644
*** a/src/backend/optimizer/prep/prepjointree.c
--- b/src/backend/optimizer/prep/prepjointree.c
***************
*** 7,12 ****
--- 7,13 ----
   *		pull_up_sublinks
   *		inline_set_returning_functions
   *		pull_up_subqueries
+  *		flatten_union_all
   *		do expression preprocessing (including flattening JOIN alias vars)
   *		reduce_outer_joins
   *
*************** pull_up_simple_union_all(PlannerInfo *ro
*** 869,879 ****
  	List	   *rtable;
  
  	/*
- 	 * Append the subquery rtable entries to upper query.
- 	 */
- 	rtoffset = list_length(root->parse->rtable);
- 
- 	/*
  	 * Append child RTEs to parent rtable.
  	 *
  	 * Upper-level vars in subquery are now one level closer to their parent
--- 870,875 ----
*************** pull_up_simple_union_all(PlannerInfo *ro
*** 881,886 ****
--- 877,883 ----
  	 * because any such vars must refer to stuff above the level of the query
  	 * we are pulling into.
  	 */
+ 	rtoffset = list_length(root->parse->rtable);
  	rtable = copyObject(subquery->rtable);
  	IncrementVarSublevelsUp_rtable(rtable, -1, 1);
  	root->parse->rtable = list_concat(root->parse->rtable, rtable);
*************** pullup_replace_vars_callback(Var *var,
*** 1418,1423 ****
--- 1415,1518 ----
  	return newnode;
  }
  
+ 
+ /*
+  * flatten_union_all
+  *		Try to optimize a top-level UNION ALL structure
+  *
+  * If a query's setOperations tree consists entirely of simple UNION ALL
+  * operations, flatten it into an append relation, which we can process more
+  * intelligently than the general set-ops case.
+  *
+  * In most cases, this can succeed only for a top-level query, because for a
+  * subquery in FROM, the parent query's invocation of pull_up_subqueries would
+  * already have flattened the UNION via pull_up_simple_union_all.  But there
+  * are a few cases we can support here but not in that code path, for example
+  * when the subquery also contains ORDER BY.
+  */
+ void
+ flatten_union_all(PlannerInfo *root)
+ {
+ 	Query	   *parse = root->parse;
+ 	SetOperationStmt *topop;
+ 	Node	   *leftmostjtnode;
+ 	int			leftmostRTI;
+ 	RangeTblEntry *leftmostRTE;
+ 	int			childRTI;
+ 	RangeTblEntry *childRTE;
+ 	RangeTblRef *rtr;
+ 
+ 	/* Shouldn't be called unless query has setops */
+ 	topop = (SetOperationStmt *) parse->setOperations;
+ 	Assert(topop && IsA(topop, SetOperationStmt));
+ 
+ 	/* Can't optimize away a recursive UNION */
+ 	if (root->hasRecursion)
+ 		return;
+ 
+ 	/*
+ 	 * Recursively check the tree of set operations.  If not all UNION ALL
+ 	 * with identical column types, punt.
+ 	 */
+ 	if (!is_simple_union_all_recurse((Node *) topop, parse,
+ 									 topop->colTypes))
+ 		return;
+ 
+ 	/*
+ 	 * Locate the leftmost leaf query in the setops tree.  The upper query's
+ 	 * Vars all refer to this RTE (see transformSetOperationStmt).
+ 	 */
+ 	leftmostjtnode = topop->larg;
+ 	while (leftmostjtnode && IsA(leftmostjtnode, SetOperationStmt))
+ 		leftmostjtnode = ((SetOperationStmt *) leftmostjtnode)->larg;
+ 	Assert(leftmostjtnode && IsA(leftmostjtnode, RangeTblRef));
+ 	leftmostRTI = ((RangeTblRef *) leftmostjtnode)->rtindex;
+ 	leftmostRTE = rt_fetch(leftmostRTI, parse->rtable);
+ 	Assert(leftmostRTE->rtekind == RTE_SUBQUERY);
+ 
+ 	/*
+ 	 * Make a copy of the leftmost RTE and add it to the rtable.  This copy
+ 	 * will represent the leftmost leaf query in its capacity as a member
+ 	 * of the appendrel.  The original will represent the appendrel as a
+ 	 * whole.  (We must do things this way because the upper query's Vars
+ 	 * have to be seen as referring to the whole appendrel.)
+ 	 */
+ 	childRTE = copyObject(leftmostRTE);
+ 	parse->rtable = lappend(parse->rtable, childRTE);
+ 	childRTI = list_length(parse->rtable);
+ 
+ 	/* Modify the setops tree to reference the child copy */
+ 	((RangeTblRef *) leftmostjtnode)->rtindex = childRTI;
+ 
+ 	/*
+ 	 * Modify the formerly-leftmost RTE to mark it as an appendrel parent
+ 	 * (compare pull_up_simple_union_all).
+ 	 */
+ 	leftmostRTE->inh = true;
+ 
+ 	/*
+ 	 * Form a RangeTblRef for the appendrel, and insert it into FROM.  The top
+ 	 * Query of a setops tree should have had an empty FromClause initially.
+ 	 */
+ 	rtr = makeNode(RangeTblRef);
+ 	rtr->rtindex = leftmostRTI;
+ 	Assert(parse->jointree->fromlist == NIL);
+ 	parse->jointree->fromlist = list_make1(rtr);
+ 
+ 	/*
+ 	 * Now pretend the query has no setops.  We must do this before trying
+ 	 * to do subquery pullup, because of Assert in pull_up_simple_subquery.
+ 	 */
+ 	parse->setOperations = NULL;
+ 
+ 	/*
+ 	 * Build AppendRelInfo information and try to pull up the leaf queries.
+ 	 * XXX need to adjust comments for pull_up_union_leaf_queries
+ 	 */
+ 	pull_up_union_leaf_queries((Node *) topop, root, leftmostRTI, parse, 0);
+ }
+ 
+ 
  /*
   * reduce_outer_joins
   *		Attempt to reduce outer joins to plain inner joins.
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index f8dd5428ee47a74637d37f5a293d2acff117eae4..e4654d9eeab0ef3908f679652866adb2589bd1ce 100644
*** a/src/include/optimizer/prep.h
--- b/src/include/optimizer/prep.h
*************** extern void inline_set_returning_functio
*** 26,31 ****
--- 26,32 ----
  extern Node *pull_up_subqueries(PlannerInfo *root, Node *jtnode,
  				   JoinExpr *lowest_outer_join,
  				   AppendRelInfo *containing_appendrel);
+ extern void flatten_union_all(PlannerInfo *root);
  extern void reduce_outer_joins(PlannerInfo *root);
  extern Relids get_relids_in_jointree(Node *jtnode, bool include_joins);
  extern Relids get_relids_for_join(PlannerInfo *root, int joinrelid);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to