Per recent discussion on -hackers, we should sometimes try to reorder
the columns of the grouping clause to avoid redundant sorts. The
optimizer is not currently capable of doing this, so this patch
implements a simple hack in the analysis phase (transformGroupClause):
if any subset of the GROUP BY clause matches a prefix of the ORDER BY
list, that prefix is moved to the front of the GROUP BY clause. This
shouldn't change the semantics of the query, and allows a redundant sort
to be avoided for queries like "GROUP BY a, b ORDER BY b".

One question about the implementation: to avoid redundant and
potentially expensive calls to findTargetlistEntry(), I constructed a
temporary list of TLEs. I think that's probably a good tradeoff, but
suggestions for improvement are welcome. Also, I released the list via
list_free() when finished with it -- is that a waste of cycles?

Barring any objections, I'll apply this to HEAD tomorrow.

-Neil

============================================================
*** src/backend/parser/parse_clause.c	1b13820b23dd57bd4e00ffbd2f979015b1536243
--- src/backend/parser/parse_clause.c	c974085c2dd4686f1112eb8a17750d41a7eebbd4
***************
*** 1303,1331 ****
   *
   * GROUP BY items will be added to the targetlist (as resjunk columns)
   * if not already present, so the targetlist must be passed by reference.
   */
  List *
  transformGroupClause(ParseState *pstate, List *grouplist,
  					 List **targetlist, List *sortClause)
  {
! 	List	   *glist = NIL;
! 	ListCell   *gl;
  	ListCell   *sortItem;
  
! 	sortItem = list_head(sortClause);
  
! 	foreach(gl, grouplist)
  	{
! 		TargetEntry *tle;
  		Oid			restype;
  		Oid			ordering_op;
  		GroupClause *grpcl;
  
- 		tle = findTargetlistEntry(pstate, lfirst(gl),
- 								  targetlist, GROUP_CLAUSE);
- 
  		/* avoid making duplicate grouplist entries */
! 		if (targetIsInSortList(tle, glist))
  			continue;
  
  		/* if tlist item is an UNKNOWN literal, change it to TEXT */
--- 1303,1388 ----
   *
   * GROUP BY items will be added to the targetlist (as resjunk columns)
   * if not already present, so the targetlist must be passed by reference.
+  *
+  * The order of the elements of the grouping clause does not affect
+  * the semantics of the query. However, the optimizer is not currently
+  * smart enough to reorder the grouping clause, so we try to do some
+  * primitive reordering here. Specifically, we check whether there is
+  * a subset of the GROUP BY clause that forms a prefix of the ORDER BY
+  * list. If there is, we move that prefix to the front of the grouping
+  * clause. This allows us to avoid redundant sorts for queries like
+  * "GROUP BY a, b ORDER BY b".
   */
  List *
  transformGroupClause(ParseState *pstate, List *grouplist,
  					 List **targetlist, List *sortClause)
  {
! 	List	   *result = NIL;
! 	List	   *tle_list = NIL;
! 	List	   *prefix = NIL;
! 	ListCell   *l;
  	ListCell   *sortItem;
  
! 	/*
! 	 * Produce a list of TargetEntries for the elements of the group
! 	 * clause.
! 	 */
! 	foreach (l, grouplist)
! 	{
! 		TargetEntry *tle = findTargetlistEntry(pstate, lfirst(l),
! 											   targetlist, GROUP_CLAUSE);
  
! 		tle_list = lappend(tle_list, tle);
! 	}
! 
! 	/*
! 	 * Search for a prefix of the ORDER BY clause in the GROUP BY
! 	 * clause, and move those TLEs to "prefix".
! 	 */
! 	foreach (l, sortClause)
  	{
! 		SortClause	*sc = (SortClause *) lfirst(l);
! 		ListCell	*prev = NULL;
! 		ListCell	*tl;
! 		bool		 found = false;
! 
! 		foreach (tl, tle_list)
! 		{
! 			TargetEntry *tle = (TargetEntry *) lfirst(tl);
! 
! 			if (sc->tleSortGroupRef == tle->ressortgroupref)
! 			{
! 				/* Found a match, so move it to the right list */
! 				tle_list = list_delete_cell(tle_list, tl, prev);
! 				prefix = lappend(prefix, tle);
! 				found = true;
! 				break;
! 			}
! 
! 			prev = tl;
! 		}
! 
! 		/*
! 		 * As soon as we've failed to match a single member of the
! 		 * ORDER BY list, stop: we're just looking for a prefix.
! 		 */
! 		if (!found)
! 			break;
! 	}
! 
! 	/* Prepend the prefix to the rest of the TLEs */
! 	tle_list = list_concat(prefix, tle_list);
! 
! 	sortItem = list_head(sortClause);
! 	foreach(l, tle_list)
! 	{
! 		TargetEntry *tle = (TargetEntry *) lfirst(l);
  		Oid			restype;
  		Oid			ordering_op;
  		GroupClause *grpcl;
  
  		/* avoid making duplicate grouplist entries */
! 		if (targetIsInSortList(tle, result))
  			continue;
  
  		/* if tlist item is an UNKNOWN literal, change it to TEXT */
***************
*** 1341,1352 ****
  		}
  
  		/*
! 		 * If the GROUP BY clause matches the ORDER BY clause, we want to
! 		 * adopt the ordering operators from the latter rather than using the
! 		 * default ops.  This allows "GROUP BY foo ORDER BY foo DESC" to be
! 		 * done with only one sort step.  Note we are assuming that any
! 		 * user-supplied ordering operator will bring equal values together,
! 		 * which is all that GROUP BY needs.
  		 */
  		if (sortItem &&
  			((SortClause *) lfirst(sortItem))->tleSortGroupRef ==
--- 1398,1411 ----
  		}
  
  		/*
! 		 * If the GROUP BY clause matches the ORDER BY clause, we want
! 		 * to adopt the ordering operators from the latter rather than
! 		 * using the default ops.  This allows "GROUP BY foo ORDER BY
! 		 * foo DESC" to be done with only one sort step.  Note we are
! 		 * assuming that any user-supplied ordering operator will
! 		 * bring equal values together, which is all that GROUP BY
! 		 * needs. Note that we've previously rearranged the grouping
! 		 * clause to help this process, as described above.
  		 */
  		if (sortItem &&
  			((SortClause *) lfirst(sortItem))->tleSortGroupRef ==
***************
*** 1364,1373 ****
  		grpcl = makeNode(GroupClause);
  		grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist);
  		grpcl->sortop = ordering_op;
! 		glist = lappend(glist, grpcl);
  	}
  
! 	return glist;
  }
  
  /*
--- 1423,1433 ----
  		grpcl = makeNode(GroupClause);
  		grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist);
  		grpcl->sortop = ordering_op;
! 		result = lappend(result, grpcl);
  	}
  
! 	list_free(tle_list);
! 	return result;
  }
  
  /*
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Reply via email to