Per recent discussion on -hackers, we should sometimes try to reorder
the columns of the grouping clause to avoid redundant sorts. The
optimizer is not currently capable of doing this, so this patch
implements a simple hack in the analysis phase (transformGroupClause):
if any subset of the GROUP BY clause matches a prefix of the ORDER BY
list, that prefix is moved to the front of the GROUP BY clause. This
shouldn't change the semantics of the query, and allows a redundant sort
to be avoided for queries like GROUP BY a, b ORDER BY b.
One question about the implementation: to avoid redundant and
potentially expensive calls to findTargetlistEntry(), I constructed a
temporary list of TLEs. I think that's probably a good tradeoff, but
suggestions for improvement are welcome. Also, I released the list via
list_free() when finished with it -- is that a waste of cycles?
Barring any objections, I'll apply this to HEAD tomorrow.
-Neil
*** src/backend/parser/parse_clause.c 1b13820b23dd57bd4e00ffbd2f979015b1536243
--- src/backend/parser/parse_clause.c c974085c2dd4686f1112eb8a17750d41a7eebbd4
***
*** 1303,1331
*
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
*/
List *
transformGroupClause(ParseState *pstate, List *grouplist,
List **targetlist, List *sortClause)
{
! List *glist = NIL;
! ListCell *gl;
ListCell *sortItem;
! sortItem = list_head(sortClause);
! foreach(gl, grouplist)
{
! TargetEntry *tle;
Oid restype;
Oid ordering_op;
GroupClause *grpcl;
- tle = findTargetlistEntry(pstate, lfirst(gl),
- targetlist, GROUP_CLAUSE);
-
/* avoid making duplicate grouplist entries */
! if (targetIsInSortList(tle, glist))
continue;
/* if tlist item is an UNKNOWN literal, change it to TEXT */
--- 1303,1388
*
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
+ *
+ * The order of the elements of the grouping clause does not affect
+ * the semantics of the query. However, the optimizer is not currently
+ * smart enough to reorder the grouping clause, so we try to do some
+ * primitive reordering here. Specifically, we check whether there is
+ * a subset of the GROUP BY clause that forms a prefix of the ORDER BY
+ * list. If there is, we move that prefix to the front of the grouping
+ * clause. This allows us to avoid redundant sorts for queries like
+ * GROUP BY a, b ORDER BY b.
*/
List *
transformGroupClause(ParseState *pstate, List *grouplist,
List **targetlist, List *sortClause)
{
! List *result = NIL;
! List *tle_list = NIL;
! List *prefix = NIL;
! ListCell *l;
ListCell *sortItem;
! /*
! * Produce a list of TargetEntries for the elements of the group
! * clause.
! */
! foreach (l, grouplist)
! {
! TargetEntry *tle = findTargetlistEntry(pstate, lfirst(l),
! targetlist, GROUP_CLAUSE);
! tle_list = lappend(tle_list, tle);
! }
!
! /*
! * Search for a prefix of the ORDER BY clause in the GROUP BY
! * clause, and move those TLEs to prefix.
! */
! foreach (l, sortClause)
{
! SortClause *sc = (SortClause *) lfirst(l);
! ListCell *prev = NULL;
! ListCell *tl;
! bool found = false;
!
! foreach (tl, tle_list)
! {
! TargetEntry *tle = (TargetEntry *) lfirst(tl);
!
! if (sc-tleSortGroupRef == tle-ressortgroupref)
! {
! /* Found a match, so move it to the right list */
! tle_list = list_delete_cell(tle_list, tl, prev);
! prefix = lappend(prefix, tle);
! found = true;
! break;
! }
!
! prev = tl;
! }
!
! /*
! * As soon as we've failed to match a single member of the
! * ORDER BY list, stop: we're just looking for a prefix.
! */
! if (!found)
! break;
! }
!
! /* Prepend the prefix to the rest of the TLEs */
! tle_list = list_concat(prefix, tle_list);
!
! sortItem = list_head(sortClause);
! foreach(l, tle_list)
! {
! TargetEntry *tle = (TargetEntry *) lfirst(l);
Oid restype;
Oid ordering_op;
GroupClause *grpcl;
/* avoid making duplicate grouplist entries */
! if (targetIsInSortList(tle, result))
continue;
/* if tlist item is an UNKNOWN literal, change it to TEXT */
***
*** 1341,1352
}
/*
! * If the GROUP BY clause matches the ORDER BY clause, we want to
! * adopt the ordering operators from the latter rather than using the
! * default ops. This allows GROUP BY foo ORDER BY foo DESC to be
! * done with only one sort step. Note we are assuming that any
! * user-supplied ordering operator will bring equal values together,
! * which is all that GROUP BY needs.
*/
if (sortItem
((SortClause *)