Per recent discussion on -hackers, we should sometimes try to reorder the columns of the grouping clause to avoid redundant sorts. The optimizer is not currently capable of doing this, so this patch implements a simple hack in the analysis phase (transformGroupClause): if any subset of the GROUP BY clause matches a prefix of the ORDER BY list, that prefix is moved to the front of the GROUP BY clause. This shouldn't change the semantics of the query, and allows a redundant sort to be avoided for queries like "GROUP BY a, b ORDER BY b".
One question about the implementation: to avoid redundant and potentially expensive calls to findTargetlistEntry(), I constructed a temporary list of TLEs. I think that's probably a good tradeoff, but suggestions for improvement are welcome. Also, I released the list via list_free() when finished with it -- is that a waste of cycles? Barring any objections, I'll apply this to HEAD tomorrow. -Neil
============================================================ *** src/backend/parser/parse_clause.c 1b13820b23dd57bd4e00ffbd2f979015b1536243 --- src/backend/parser/parse_clause.c c974085c2dd4686f1112eb8a17750d41a7eebbd4 *************** *** 1303,1331 **** * * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. */ List * transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause) { ! List *glist = NIL; ! ListCell *gl; ListCell *sortItem; ! sortItem = list_head(sortClause); ! foreach(gl, grouplist) { ! TargetEntry *tle; Oid restype; Oid ordering_op; GroupClause *grpcl; - tle = findTargetlistEntry(pstate, lfirst(gl), - targetlist, GROUP_CLAUSE); - /* avoid making duplicate grouplist entries */ ! if (targetIsInSortList(tle, glist)) continue; /* if tlist item is an UNKNOWN literal, change it to TEXT */ --- 1303,1388 ---- * * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. + * + * The order of the elements of the grouping clause does not affect + * the semantics of the query. However, the optimizer is not currently + * smart enough to reorder the grouping clause, so we try to do some + * primitive reordering here. Specifically, we check whether there is + * a subset of the GROUP BY clause that forms a prefix of the ORDER BY + * list. If there is, we move that prefix to the front of the grouping + * clause. This allows us to avoid redundant sorts for queries like + * "GROUP BY a, b ORDER BY b". */ List * transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause) { ! List *result = NIL; ! List *tle_list = NIL; ! List *prefix = NIL; ! ListCell *l; ListCell *sortItem; ! /* ! * Produce a list of TargetEntries for the elements of the group ! * clause. ! */ ! foreach (l, grouplist) ! { ! TargetEntry *tle = findTargetlistEntry(pstate, lfirst(l), ! targetlist, GROUP_CLAUSE); ! tle_list = lappend(tle_list, tle); ! } ! ! /* ! * Search for a prefix of the ORDER BY clause in the GROUP BY ! * clause, and move those TLEs to "prefix". ! */ ! foreach (l, sortClause) { ! SortClause *sc = (SortClause *) lfirst(l); ! ListCell *prev = NULL; ! ListCell *tl; ! bool found = false; ! ! foreach (tl, tle_list) ! { ! TargetEntry *tle = (TargetEntry *) lfirst(tl); ! ! if (sc->tleSortGroupRef == tle->ressortgroupref) ! { ! /* Found a match, so move it to the right list */ ! tle_list = list_delete_cell(tle_list, tl, prev); ! prefix = lappend(prefix, tle); ! found = true; ! break; ! } ! ! prev = tl; ! } ! ! /* ! * As soon as we've failed to match a single member of the ! * ORDER BY list, stop: we're just looking for a prefix. ! */ ! if (!found) ! break; ! } ! ! /* Prepend the prefix to the rest of the TLEs */ ! tle_list = list_concat(prefix, tle_list); ! ! sortItem = list_head(sortClause); ! foreach(l, tle_list) ! { ! TargetEntry *tle = (TargetEntry *) lfirst(l); Oid restype; Oid ordering_op; GroupClause *grpcl; /* avoid making duplicate grouplist entries */ ! if (targetIsInSortList(tle, result)) continue; /* if tlist item is an UNKNOWN literal, change it to TEXT */ *************** *** 1341,1352 **** } /* ! * If the GROUP BY clause matches the ORDER BY clause, we want to ! * adopt the ordering operators from the latter rather than using the ! * default ops. This allows "GROUP BY foo ORDER BY foo DESC" to be ! * done with only one sort step. Note we are assuming that any ! * user-supplied ordering operator will bring equal values together, ! * which is all that GROUP BY needs. */ if (sortItem && ((SortClause *) lfirst(sortItem))->tleSortGroupRef == --- 1398,1411 ---- } /* ! * If the GROUP BY clause matches the ORDER BY clause, we want ! * to adopt the ordering operators from the latter rather than ! * using the default ops. This allows "GROUP BY foo ORDER BY ! * foo DESC" to be done with only one sort step. Note we are ! * assuming that any user-supplied ordering operator will ! * bring equal values together, which is all that GROUP BY ! * needs. Note that we've previously rearranged the grouping ! * clause to help this process, as described above. */ if (sortItem && ((SortClause *) lfirst(sortItem))->tleSortGroupRef == *************** *** 1364,1373 **** grpcl = makeNode(GroupClause); grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist); grpcl->sortop = ordering_op; ! glist = lappend(glist, grpcl); } ! return glist; } /* --- 1423,1433 ---- grpcl = makeNode(GroupClause); grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist); grpcl->sortop = ordering_op; ! result = lappend(result, grpcl); } ! list_free(tle_list); ! return result; } /*
---------------------------(end of broadcast)--------------------------- TIP 2: Don't 'kill -9' the postmaster