The query optimizer currently does not consider reordering a query's
grouping columns. While the order in which ORDER BY columns are
specified affects the semantics of the query, AFAICS GROUP BY's column
order does not. Reordering a query's grouping columns would allow the
optimizer to avoid some unnecessary sorts; for example, given an index
on (a, b), we should be able to avoid a sort in this query:

SELECT a, b, max(c) FROM t1 GROUP BY b, a;

which the optimizer is currently incapable of doing.

I think fixing this properly would require teaching the planner that
certain PathKeys are unordered, so the planner can pick whichever order
is best. That looks like a fairly invasive change: the assumption that
PathKeyItems are ordered looks pretty widespread.

A simple hack might help with a subset of this problem, though. For
queries with both ORDER BY and GROUP BY clauses, we can sort the
grouping columns according to their position in the ORDER BY list. So,
given a query like:

SELECT a, b, max(c) FROM t1 GROUP BY a, b ORDER BY b;

We can avoid the redundant sort for the ORDER BY by grouping by (b, a)
instead. Attached is a proof-of-concept patch that implements this,
although it's an enormous kludge.

Thoughts?

-Neil

# 
# old_revision [7c6bab196365c3c324210ded9cea01038fd07207]
# 
# patch "src/backend/optimizer/path/pathkeys.c"
#  from [e6be522f2cec66b12a3cc2f3c5f4f51b52b6ab57]
#    to [c52e4a6a4408f81933273acecb7e0e2f59948585]
# 
# patch "src/backend/optimizer/plan/planmain.c"
#  from [379d9feab5bc2737dec63d52b3d75e1e7eb5bf30]
#    to [c884241d073161bc4eaeded7dda01a6ecd3639b7]
# 
# patch "src/backend/optimizer/plan/planner.c"
#  from [5a40208a0611916228cc08497bca0ae593555b3a]
#    to [94cab4ec5e862d671e80aee41f34864b415a5f5e]
# 
# patch "src/include/optimizer/paths.h"
#  from [076eea394b0c5bfb9a7fd159d39be0ace8481d32]
#    to [9615bb8f8006cde7ce48dfdb2ccab54feb344c17]
# 
============================================================
*** src/backend/optimizer/path/pathkeys.c	e6be522f2cec66b12a3cc2f3c5f4f51b52b6ab57
--- src/backend/optimizer/path/pathkeys.c	c52e4a6a4408f81933273acecb7e0e2f59948585
***************
*** 722,728 ****
--- 722,787 ----
  	return new_pathkeys;
  }
  
+ void
+ reorder_group_pathkeys(PlannerInfo *root)
+ {
+ 	ListCell   *group_pos;
+ 	ListCell   *lc;
  
+ 	if (root->sort_pathkeys == NIL || root->group_pathkeys == NIL)
+ 		return;
+ 
+ 	/* If the grouping list contains just a single column, we can't reorder */
+ 	if (list_length(root->group_pathkeys) == 1)
+ 		return;
+ 
+ 	group_pos = NULL;
+ 	foreach (lc, root->sort_pathkeys)
+ 	{
+ 		List		*order_by_key = lfirst(lc);
+ 		ListCell	*prev;
+ 		ListCell	*match;
+ 
+ 		/* Look for the current order_by_key in the grouping list */
+ 		prev = NULL;
+ 		if (group_pos)
+ 			match = lnext(group_pos);
+ 		else
+ 			match = list_head(root->group_pathkeys);
+ 
+ 		while (match)
+ 		{
+ 			List *key = (List *) lfirst(match);
+ 
+ 			if (key == order_by_key)
+ 			{
+ 				root->group_pathkeys = list_delete_cell(root->group_pathkeys,
+ 														match, prev);
+ 
+ 				if (group_pos == NULL)
+ 				{
+ 					root->group_pathkeys = lcons(key, root->group_pathkeys);
+ 					group_pos = list_head(root->group_pathkeys);
+ 				}
+ 				else
+ 				{
+ 					group_pos = lappend_cell(root->group_pathkeys,
+ 											 group_pos, key);
+ 				}
+ 
+ 				break;
+ 			}
+ 
+ 			prev = match;
+ 			match = lnext(match);
+ 		}
+ 
+ 		if (!match)
+ 			return;
+ 	}
+ }
+ 
+ 
  /*
   * count_canonical_peers
   *	  Given a PathKeyItem, find the equi_key_list subset it is a member of,
============================================================
*** src/backend/optimizer/plan/planmain.c	379d9feab5bc2737dec63d52b3d75e1e7eb5bf30
--- src/backend/optimizer/plan/planmain.c	c884241d073161bc4eaeded7dda01a6ecd3639b7
***************
*** 171,181 ****
  	 * Also canonicalize the groupClause and sortClause pathkeys for use
  	 * later.
  	 */
- 	root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
  	root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
  	root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
  
  	/*
  	 * Ready to do the primary planning.
  	 */
  	final_rel = make_one_rel(root, joinlist);
--- 171,198 ----
  	 * Also canonicalize the groupClause and sortClause pathkeys for use
  	 * later.
  	 */
  	root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
  	root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
  
  	/*
+ 	 * The order in which the GROUP BY columns are specified does not
+ 	 * affect the semantics of the query. Currently the planner does
+ 	 * not realize this: in general, it assumes the ordering of
+ 	 * PathKeys is important. As a quick hack, we reorder the grouping
+ 	 * columns to match their position in the ORDER BY list. This
+ 	 * allows us to avoid a redundant sort for queries like GROUP BY
+ 	 * a, b ORDER BY b.
+ 	 */
+ 	reorder_group_pathkeys(root);
+ 
+ 	if (root->group_pathkeys)
+ 		root->query_pathkeys = list_copy(root->group_pathkeys);
+ 	else if (root->sort_pathkeys)
+ 		root->query_pathkeys = list_copy(root->sort_pathkeys);
+ 	else
+ 		root->query_pathkeys = NIL;
+ 
+ 	/*
  	 * Ready to do the primary planning.
  	 */
  	final_rel = make_one_rel(root, joinlist);
============================================================
*** src/backend/optimizer/plan/planner.c	5a40208a0611916228cc08497bca0ae593555b3a
--- src/backend/optimizer/plan/planner.c	94cab4ec5e862d671e80aee41f34864b415a5f5e
***************
*** 754,759 ****
--- 754,760 ----
  			count_agg_clauses(parse->havingQual, &agg_counts);
  		}
  
+ #if 0
  		/*
  		 * Figure out whether we need a sorted result from query_planner.
  		 *
***************
*** 770,775 ****
--- 771,777 ----
  			root->query_pathkeys = root->sort_pathkeys;
  		else
  			root->query_pathkeys = NIL;
+ #endif
  
  		/*
  		 * Generate the best unsorted and presorted paths for this Query (but
============================================================
*** src/include/optimizer/paths.h	076eea394b0c5bfb9a7fd159d39be0ace8481d32
--- src/include/optimizer/paths.h	9615bb8f8006cde7ce48dfdb2ccab54feb344c17
***************
*** 106,111 ****
--- 106,112 ----
  extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2);
  extern void generate_implied_equalities(PlannerInfo *root);
  extern List *canonicalize_pathkeys(PlannerInfo *root, List *pathkeys);
+ extern void reorder_group_pathkeys(PlannerInfo *root);
  extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
  extern bool pathkeys_contained_in(List *keys1, List *keys2);
  extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Reply via email to