I wrote:
> This patch is aimed at being smarter about cases where we have
> redundant GROUP BY entries, for example
> SELECT ... WHERE a.x = b.y GROUP BY a.x, b.y;
The cfbot didn't like this, because of a variable that wasn't
used in non-assert builds. Fixed in v2.
regards, tom lane
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 9524765650..450f724c49 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -3667,6 +3667,13 @@ appendGroupByClause(List *tlist, deparse_expr_cxt *context)
*/
Assert(!query->groupingSets);
+ /*
+ * We intentionally print query->groupClause not processed_groupClause,
+ * leaving it to the remote planner to get rid of any redundant GROUP BY
+ * items again. This is necessary in case processed_groupClause reduced
+ * to empty, and in any case the redundancy situation on the remote might
+ * be different than what we think here.
+ */
foreach(lc, query->groupClause)
{
SortGroupClause *grp = (SortGroupClause *) lfirst(lc);
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 1a2c2a665c..befb2fd4d3 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2864,16 +2864,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
-- GROUP BY clause in various forms, cardinal, alias and constant expression
explain (verbose, costs off)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
- QUERY PLAN
----------------------------------------------------------------------------------------
- Sort
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
Output: (count(c2)), c2, 5, 7.0, 9
- Sort Key: ft1.c2
- -> Foreign Scan
- Output: (count(c2)), c2, 5, 7.0, 9
- Relations: Aggregate on (public.ft1)
- Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+ Relations: Aggregate on (public.ft1)
+ Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
w | x | y | z
@@ -2990,14 +2987,13 @@ select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
QUERY PLAN
---------------------------------------------------
GroupAggregate
- Output: ($0), sum(ft1.c1)
- Group Key: $0
+ Output: $0, sum(ft1.c1)
InitPlan 1 (returns $0)
-> Seq Scan on pg_catalog.pg_enum
-> Foreign Scan on public.ft1
- Output: $0, ft1.c1
+ Output: ft1.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
-(8 rows)
+(7 rows)
select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
exists | sum
@@ -3382,14 +3378,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
--------------------------------------------------------------------------------------------------
GroupAggregate
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
- Group Key: ft2.c2
-> Sort
- Output: c2, c1
+ Output: c1, c2
Sort Key: ft2.c1 USING <^
-> Foreign Scan on public.ft2
- Output: c2, c1
+ Output: c1, c2
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(9 rows)
+(8 rows)
-- This should not be pushed either.
explain (verbose, costs off)
@@ -3458,14 +3453,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
--------------------------------------------------------------------------------------------------
GroupAggregate
Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
- Group Key: ft2.c2
-> Sort
- Output: c2, c1
+ Output: c1, c2
Sort Key: ft2.c1 USING <^
-> Foreign Scan on public.ft2
- Output: c2, c1
+ Output: c1, c2
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(9 rows)
+(8 rows)
-- Cleanup
drop operator class my_op_class using btree;
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index b9268e32dd..722ebc932b 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -3345,9 +3345,9 @@ estimate_path_cost_size(PlannerInfo *root,
}
/* Get number of grouping columns and possible number of groups */
- numGroupCols = list_length(root->parse->groupClause);
+ numGroupCols = list_length(root->processed_groupClause);
numGroups = estimate_num_groups(root,
- get_sortgrouplist_exprs(root->parse->groupClause,
+ get_sortgrouplist_exprs(root->processed_groupClause,
fpinfo->grouped_tlist),
input_rows, NULL, NULL);
@@ -3636,7 +3636,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root,
* pathkeys, adjust the costs with that function. Otherwise, adjust the
* costs by applying the same heuristic as for the scan or join case.
*/
- if (!grouping_is_sortable(root->parse->groupClause) ||
+ if (!grouping_is_sortable(root->processed_groupClause) ||
!pathkeys_contained_in(pathkeys, root->group_pathkeys))
{
Path sort_path; /* dummy for result of cost_sort */
@@ -6202,7 +6202,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
ListCell *l;
- /* Check whether this expression is part of GROUP BY clause */
+ /*
+ * Check whether this expression is part of GROUP BY clause. Note we
+ * check the whole GROUP BY clause not just processed_groupClause,
+ * because we will ship all of it, cf. appendGroupByClause.
+ */
if (sgref && get_sortgroupref_clause_noerr(sgref, query->groupClause))
{
TargetEntry *tle;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 30c9143183..a8a5cb6ef8 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2476,7 +2476,7 @@ agg_retrieve_direct(AggState *aggstate)
* If we are grouping, check whether we've crossed a group
* boundary.
*/
- if (node->aggstrategy != AGG_PLAIN)
+ if (node->aggstrategy != AGG_PLAIN && node->numCols > 0)
{
tmpcontext->ecxt_innertuple = firstSlot;
if (!ExecQual(aggstate->phase->eqfunctions[node->numCols - 1],
@@ -3480,8 +3480,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
*/
if (aggnode->aggstrategy == AGG_SORTED)
{
- Assert(aggnode->numCols > 0);
-
/*
* Build a separate function for each subset of columns that
* need to be compared.
@@ -3507,7 +3505,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
}
/* and for all grouped columns, unless already computed */
- if (phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
+ if (aggnode->numCols > 0 &&
+ phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
{
phasedata->eqfunctions[aggnode->numCols - 1] =
execTuplesMatchPrepare(scanDesc,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index a9943cd6e0..bb11dbec7b 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1152,18 +1152,62 @@ List *
make_pathkeys_for_sortclauses(PlannerInfo *root,
List *sortclauses,
List *tlist)
+{
+ List *result;
+ bool sortable;
+
+ result = make_pathkeys_for_sortclauses_extended(root,
+ &sortclauses,
+ tlist,
+ false,
+ &sortable);
+ /* It's caller error if not all clauses were sortable */
+ Assert(sortable);
+ return result;
+}
+
+/*
+ * make_pathkeys_for_sortclauses_extended
+ * Generate a pathkeys list that represents the sort order specified
+ * by a list of SortGroupClauses
+ *
+ * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
+ *
+ * If remove_redundant is true, then any sort clauses that are found to
+ * give rise to redundant pathkeys are removed from the sortclauses list
+ * (which therefore must be pass-by-reference in this version).
+ *
+ * *sortable is set to true if all the sort clauses are in fact sortable.
+ * If any are not, they are ignored except for setting *sortable false.
+ * (In that case, the output pathkey list isn't really useful. However,
+ * we process the whole sortclauses list anyway, because it's still valid
+ * to remove any clauses that can be proven redundant via the eclass logic.
+ * Even though we'll have to hash in that case, we might as well not hash
+ * redundant columns.)
+ */
+List *
+make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
+ List **sortclauses,
+ List *tlist,
+ bool remove_redundant,
+ bool *sortable)
{
List *pathkeys = NIL;
ListCell *l;
- foreach(l, sortclauses)
+ *sortable = true;
+ foreach(l, *sortclauses)
{
SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
Expr *sortkey;
PathKey *pathkey;
sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
- Assert(OidIsValid(sortcl->sortop));
+ if (!OidIsValid(sortcl->sortop))
+ {
+ *sortable = false;
+ continue;
+ }
pathkey = make_pathkey_from_sortop(root,
sortkey,
root->nullable_baserels,
@@ -1175,6 +1219,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
/* Canonical form eliminates redundant ordering keys */
if (!pathkey_is_redundant(pathkey, pathkeys))
pathkeys = lappend(pathkeys, pathkey);
+ else if (remove_redundant)
+ *sortclauses = foreach_delete_current(*sortclauses, l);
}
return pathkeys;
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 66139928e8..a4c6062b41 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -2404,7 +2404,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
* sizing.
*/
maxref = 0;
- foreach(lc, root->parse->groupClause)
+ foreach(lc, root->processed_groupClause)
{
SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
@@ -2415,7 +2415,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber));
/* Now look up the column numbers in the child's tlist */
- foreach(lc, root->parse->groupClause)
+ foreach(lc, root->processed_groupClause)
{
SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8a41e1e6d3..bcc10942cc 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -95,17 +95,9 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL;
#define EXPRKIND_TABLEFUNC 11
#define EXPRKIND_TABLEFUNC_LATERAL 12
-/* Passthrough data for standard_qp_callback */
-typedef struct
-{
- List *activeWindows; /* active windows, if any */
- List *groupClause; /* overrides parse->groupClause */
-} standard_qp_extra;
-
/*
* Data specific to grouping sets
*/
-
typedef struct
{
List *rollups;
@@ -129,6 +121,13 @@ typedef struct
* clauses per Window */
} WindowClauseSortData;
+/* Passthrough data for standard_qp_callback */
+typedef struct
+{
+ List *activeWindows; /* active windows, if any */
+ grouping_sets_data *gset_data; /* grouping sets data, if any */
+} standard_qp_extra;
+
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -636,6 +635,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->rowMarks = NIL;
memset(root->upper_rels, 0, sizeof(root->upper_rels));
memset(root->upper_targets, 0, sizeof(root->upper_targets));
+ root->processed_groupClause = NIL;
root->processed_tlist = NIL;
root->update_colnos = NIL;
root->grouping_map = NULL;
@@ -1032,9 +1032,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
}
parse->havingQual = (Node *) newHaving;
- /* Remove any redundant GROUP BY columns */
- remove_useless_groupby_columns(root);
-
/*
* If we have any outer joins, try to reduce them to plain inner joins.
* This step is most easily done after we've done expression
@@ -1393,11 +1390,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
{
gset_data = preprocess_grouping_sets(root);
}
- else
+ else if (parse->groupClause)
{
/* Preprocess regular GROUP BY clause, if any */
- if (parse->groupClause)
- parse->groupClause = preprocess_groupclause(root, NIL);
+ root->processed_groupClause = preprocess_groupclause(root, NIL);
+ /* Remove any redundant GROUP BY columns */
+ remove_useless_groupby_columns(root);
}
/*
@@ -1475,9 +1473,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Set up data needed by standard_qp_callback */
qp_extra.activeWindows = activeWindows;
- qp_extra.groupClause = (gset_data
- ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
- : parse->groupClause);
+ qp_extra.gset_data = gset_data;
/*
* Generate the best unsorted and presorted paths for the scan/join
@@ -2011,6 +2007,12 @@ preprocess_grouping_sets(PlannerInfo *root)
gd->unsortable_refs = NULL;
gd->unsortable_sets = NIL;
+ /*
+ * We don't currently make any attempt to optimize the groupClause when
+ * there are grouping sets, so just duplicate it in processed_groupClause.
+ */
+ root->processed_groupClause = parse->groupClause;
+
if (parse->groupClause)
{
ListCell *lc;
@@ -2638,7 +2640,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
int relid;
/* No chance to do anything if there are less than two GROUP BY items */
- if (list_length(parse->groupClause) < 2)
+ if (list_length(root->processed_groupClause) < 2)
return;
/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
@@ -2652,7 +2654,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
*/
groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
(list_length(parse->rtable) + 1));
- foreach(lc, parse->groupClause)
+ foreach(lc, root->processed_groupClause)
{
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2747,7 +2749,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
{
List *new_groupby = NIL;
- foreach(lc, parse->groupClause)
+ foreach(lc, root->processed_groupClause)
{
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2764,7 +2766,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
new_groupby = lappend(new_groupby, sgc);
}
- parse->groupClause = new_groupby;
+ root->processed_groupClause = new_groupby;
}
}
@@ -2784,6 +2786,10 @@ remove_useless_groupby_columns(PlannerInfo *root)
* Note: we need no comparable processing of the distinctClause because
* the parser already enforced that that matches ORDER BY.
*
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause. This is important
+ * because later processing may modify the processed_groupClause list.
+ *
* For grouping sets, the order of items is instead forced to agree with that
* of the grouping set (and items not in the grouping set are skipped). The
* work of sorting the order of grouping set elements to match the ORDER BY if
@@ -2814,7 +2820,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)
/* If no ORDER BY, nothing useful to do here */
if (parse->sortClause == NIL)
- return parse->groupClause;
+ return list_copy(parse->groupClause);
/*
* Scan the ORDER BY clause and construct a list of matching GROUP BY
@@ -2845,7 +2851,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)
/* If no match at all, no point in reordering GROUP BY */
if (new_groupclause == NIL)
- return parse->groupClause;
+ return list_copy(parse->groupClause);
/*
* Add any remaining GROUP BY items to the new list, but only if we were
@@ -2861,10 +2867,10 @@ preprocess_groupclause(PlannerInfo *root, List *force)
if (list_member_ptr(new_groupclause, gc))
continue; /* it matched an ORDER BY item */
- if (partial_match)
- return parse->groupClause; /* give up, no common sort possible */
- if (!OidIsValid(gc->sortop))
- return parse->groupClause; /* give up, GROUP BY can't be sorted */
+ if (partial_match) /* give up, no common sort possible */
+ return list_copy(parse->groupClause);
+ if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
+ return list_copy(parse->groupClause);
new_groupclause = lappend(new_groupclause, gc);
}
@@ -3148,64 +3154,39 @@ reorder_grouping_sets(List *groupingSets, List *sortclause)
}
/*
- * make_pathkeys_for_groupagg
- * Determine the pathkeys for the GROUP BY clause and/or any ordered
- * aggregates. We expect at least one of these here.
+ * adjust_group_pathkeys_for_groupagg
+ * Add pathkeys to root->group_pathkeys to reflect the best set of
+ * pre-ordered input for ordered aggregates.
*
- * Building the pathkeys for the GROUP BY is simple. Most of the complexity
- * involved here comes from calculating the best pathkeys for ordered
- * aggregates. We define "best" as the pathkeys that suit the most number of
+ * We define "best" as the pathkeys that suit the largest number of
* aggregate functions. We find these by looking at the first ORDER BY /
* DISTINCT aggregate and take the pathkeys for that before searching for
* other aggregates that require the same or a more strict variation of the
* same pathkeys. We then repeat that process for any remaining aggregates
* with different pathkeys and if we find another set of pathkeys that suits a
- * larger number of aggregates then we return those pathkeys instead.
- *
- * *number_groupby_pathkeys gets set to the number of elements in the returned
- * list that belong to the GROUP BY clause. Any elements above this number
- * must belong to ORDER BY / DISTINCT aggregates.
+ * larger number of aggregates then we select those pathkeys instead.
*
* When the best pathkeys are found we also mark each Aggref that can use
* those pathkeys as aggpresorted = true.
*/
-static List *
-make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
- int *number_groupby_pathkeys)
+static void
+adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
{
- List *grouppathkeys = NIL;
+ List *grouppathkeys = root->group_pathkeys;
List *bestpathkeys;
Bitmapset *bestaggs;
Bitmapset *unprocessed_aggs;
ListCell *lc;
int i;
- Assert(groupClause != NIL || root->numOrderedAggs > 0);
-
- if (groupClause != NIL)
- {
- /* no pathkeys possible if there's an unsortable GROUP BY */
- if (!grouping_is_sortable(groupClause))
- {
- *number_groupby_pathkeys = 0;
- return NIL;
- }
-
- grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
- tlist);
- *number_groupby_pathkeys = list_length(grouppathkeys);
- }
- else
- *number_groupby_pathkeys = 0;
+ /* Shouldn't be here if there are grouping sets */
+ Assert(root->parse->groupingSets == NIL);
+ /* Shouldn't be here unless there are some ordered aggregates */
+ Assert(root->numOrderedAggs > 0);
- /*
- * We can't add pathkeys for ordered aggregates if there are any grouping
- * sets. All handling specific to ordered aggregates must be done by the
- * executor in that case.
- */
- if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL ||
- !enable_presorted_aggregate)
- return grouppathkeys;
+ /* Do nothing if disabled */
+ if (!enable_presorted_aggregate)
+ return;
/*
* Make a first pass over all AggInfos to collect a Bitmapset containing
@@ -3327,6 +3308,14 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
}
}
+ /*
+ * If we found any ordered aggregates, update root->group_pathkeys to add
+ * the best set of aggregate pathkeys. Note that bestpathkeys includes
+ * the original GROUP BY pathkeys already.
+ */
+ if (bestpathkeys != NIL)
+ root->group_pathkeys = bestpathkeys;
+
/*
* Now that we've found the best set of aggregates we can set the
* presorted flag to indicate to the executor that it needn't bother
@@ -3347,16 +3336,6 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
aggref->aggpresorted = true;
}
}
-
- /*
- * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
- * aggregates, then return bestpathkeys, otherwise return the
- * grouppathkeys.
- */
- if (bestpathkeys != NIL)
- return bestpathkeys;
-
- return grouppathkeys;
}
/*
@@ -3374,11 +3353,62 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* Calculate pathkeys that represent grouping/ordering and/or ordered
* aggregate requirements.
*/
- if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
- root->group_pathkeys = make_pathkeys_for_groupagg(root,
- qp_extra->groupClause,
- tlist,
- &root->num_groupby_pathkeys);
+ if (qp_extra->gset_data)
+ {
+ /*
+ * With grouping sets, just use the first RollupData's groupClause. We
+ * don't make any effort to optimize grouping clauses when there are
+ * grouping sets, nor can we combine aggregate ordering keys with
+ * grouping.
+ */
+ List *rollups = qp_extra->gset_data->rollups;
+ List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
+
+ if (grouping_is_sortable(groupClause))
+ {
+ root->group_pathkeys = make_pathkeys_for_sortclauses(root,
+ groupClause,
+ tlist);
+ root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+ }
+ else
+ {
+ root->group_pathkeys = NIL;
+ root->num_groupby_pathkeys = 0;
+ }
+ }
+ else if (parse->groupClause || root->numOrderedAggs > 0)
+ {
+ /*
+ * With a plain GROUP BY list, we can remove any grouping items that
+ * are proven redundant by EquivalenceClass processing. For example,
+ * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
+ * especially common cases, but they're nearly free to detect. Note
+ * that we remove redundant items from processed_groupClause but not
+ * the original parse->groupClause.
+ */
+ bool sortable;
+
+ root->group_pathkeys =
+ make_pathkeys_for_sortclauses_extended(root,
+ &root->processed_groupClause,
+ tlist,
+ true,
+ &sortable);
+ if (!sortable)
+ {
+ /* Can't sort; no point in considering aggregate ordering either */
+ root->group_pathkeys = NIL;
+ root->num_groupby_pathkeys = 0;
+ }
+ else
+ {
+ root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+ /* If we have ordered aggs, consider adding onto group_pathkeys */
+ if (root->numOrderedAggs > 0)
+ adjust_group_pathkeys_for_groupagg(root);
+ }
+ }
else
{
root->group_pathkeys = NIL;
@@ -3531,8 +3561,8 @@ get_number_of_groups(PlannerInfo *root,
}
else
{
- /* Plain GROUP BY */
- groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+ /* Plain GROUP BY -- estimate based on optimized groupClause */
+ groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
target_list);
dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
@@ -3610,8 +3640,8 @@ create_grouping_paths(PlannerInfo *root,
/*
* Determine whether it's possible to perform sort-based
- * implementations of grouping. (Note that if groupClause is empty,
- * grouping_is_sortable() is trivially true, and all the
+ * implementations of grouping. (Note that if processed_groupClause
+ * is empty, grouping_is_sortable() is trivially true, and all the
* pathkeys_contained_in() tests will succeed too, so that we'll
* consider every surviving input path.)
*
@@ -3620,7 +3650,7 @@ create_grouping_paths(PlannerInfo *root,
* must consider any sorted-input plan.
*/
if ((gd && gd->rollups != NIL)
- || grouping_is_sortable(parse->groupClause))
+ || grouping_is_sortable(root->processed_groupClause))
flags |= GROUPING_CAN_USE_SORT;
/*
@@ -3645,7 +3675,7 @@ create_grouping_paths(PlannerInfo *root,
*/
if ((parse->groupClause != NIL &&
root->numOrderedAggs == 0 &&
- (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
+ (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
flags |= GROUPING_CAN_USE_HASH;
/*
@@ -3856,6 +3886,9 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
* partial partitionwise aggregation. But if partial aggregation is
* not supported in general then we can't use it for partitionwise
* aggregation either.
+ *
+ * Check parse->groupClause not processed_groupClause, because it's
+ * okay if some of the partitioning columns were proved redundant.
*/
if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
group_by_has_partkey(input_rel, extra->targetList,
@@ -5214,8 +5247,9 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
Expr *expr = (Expr *) lfirst(lc);
Index sgref = get_pathtarget_sortgroupref(final_target, i);
- if (sgref && parse->groupClause &&
- get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+ if (sgref && root->processed_groupClause &&
+ get_sortgroupref_clause_noerr(sgref,
+ root->processed_groupClause) != NULL)
{
/*
* It's a grouping column, so add it to the input target as-is.
@@ -5283,7 +5317,6 @@ make_partial_grouping_target(PlannerInfo *root,
PathTarget *grouping_target,
Node *havingQual)
{
- Query *parse = root->parse;
PathTarget *partial_target;
List *non_group_cols;
List *non_group_exprs;
@@ -5299,8 +5332,9 @@ make_partial_grouping_target(PlannerInfo *root,
Expr *expr = (Expr *) lfirst(lc);
Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
- if (sgref && parse->groupClause &&
- get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+ if (sgref && root->processed_groupClause &&
+ get_sortgroupref_clause_noerr(sgref,
+ root->processed_groupClause) != NULL)
{
/*
* It's a grouping column, so add it to the partial_target as-is.
@@ -5747,7 +5781,6 @@ make_window_input_target(PlannerInfo *root,
PathTarget *final_target,
List *activeWindows)
{
- Query *parse = root->parse;
PathTarget *input_target;
Bitmapset *sgrefs;
List *flattenable_cols;
@@ -5755,7 +5788,7 @@ make_window_input_target(PlannerInfo *root,
int i;
ListCell *lc;
- Assert(parse->hasWindowFuncs);
+ Assert(root->parse->hasWindowFuncs);
/*
* Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
@@ -5782,7 +5815,7 @@ make_window_input_target(PlannerInfo *root,
}
/* Add in sortgroupref numbers of GROUP BY clauses, too */
- foreach(lc, parse->groupClause)
+ foreach(lc, root->processed_groupClause)
{
SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);
@@ -6701,7 +6734,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
- parse->groupClause,
+ root->processed_groupClause,
havingQual,
agg_costs,
dNumGroups));
@@ -6716,7 +6749,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
create_group_path(root,
grouped_rel,
path,
- parse->groupClause,
+ root->processed_groupClause,
havingQual,
dNumGroups));
}
@@ -6785,7 +6818,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
- parse->groupClause,
+ root->processed_groupClause,
havingQual,
agg_final_costs,
dNumGroups));
@@ -6794,7 +6827,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
create_group_path(root,
grouped_rel,
path,
- parse->groupClause,
+ root->processed_groupClause,
havingQual,
dNumGroups));
@@ -6825,7 +6858,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_SIMPLE,
- parse->groupClause,
+ root->processed_groupClause,
havingQual,
agg_costs,
dNumGroups));
@@ -6846,7 +6879,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_FINAL_DESERIAL,
- parse->groupClause,
+ root->processed_groupClause,
havingQual,
agg_final_costs,
dNumGroups));
@@ -7048,7 +7081,7 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
- parse->groupClause,
+ root->processed_groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
@@ -7057,7 +7090,7 @@ create_partial_grouping_paths(PlannerInfo *root,
create_group_path(root,
partially_grouped_rel,
path,
- parse->groupClause,
+ root->processed_groupClause,
NIL,
dNumPartialGroups));
}
@@ -7117,7 +7150,7 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
- parse->groupClause,
+ root->processed_groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
@@ -7126,7 +7159,7 @@ create_partial_grouping_paths(PlannerInfo *root,
create_group_path(root,
partially_grouped_rel,
path,
- parse->groupClause,
+ root->processed_groupClause,
NIL,
dNumPartialPartialGroups));
}
@@ -7147,7 +7180,7 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
- parse->groupClause,
+ root->processed_groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
@@ -7165,7 +7198,7 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel->reltarget,
AGG_HASHED,
AGGSPLIT_INITIAL_SERIAL,
- parse->groupClause,
+ root->processed_groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 4cec12ab19..51d545bbd6 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1008,6 +1008,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
subroot->rowMarks = NIL;
memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
+ subroot->processed_groupClause = NIL;
subroot->processed_tlist = NIL;
subroot->update_colnos = NIL;
subroot->grouping_map = NULL;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 654dba61aa..11c3545637 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -406,6 +406,22 @@ struct PlannerInfo
/* Result tlists chosen by grouping_planner for upper-stage processing */
struct PathTarget *upper_targets[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);
+ /*
+ * The fully-processed groupClause is kept here. It differs from
+ * parse->groupClause in that we remove any items that we can prove
+ * redundant, so that only the columns named here actually need to be
+ * compared to determine grouping. Note that it's possible for *all* the
+ * items to be proven redundant, implying that there is only one group
+ * containing all the query's rows. Hence, if you want to check whether
+ * GROUP BY was specified, test for nonempty parse->groupClause, not for
+ * nonempty processed_groupClause.
+ *
+ * Currently, when grouping sets are specified we do not attempt to
+ * optimize the groupClause, so that processed_groupClause will be
+ * identical to parse->groupClause.
+ */
+ List *processed_groupClause;
+
/*
* The fully-processed targetlist is kept here. It differs from
* parse->targetList in that (for INSERT) it's been reordered to match the
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 41f765d342..7719f89122 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -228,6 +228,11 @@ extern List *build_join_pathkeys(PlannerInfo *root,
extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
List *sortclauses,
List *tlist);
+extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
+ List **sortclauses,
+ List *tlist,
+ bool remove_redundant,
+ bool *sortable);
extern void initialize_mergeclause_eclasses(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 309c2dc865..648da0f7c4 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1289,7 +1289,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
QUERY PLAN
------------------------------------------------------
HashAggregate
- Group Key: t1.a, t1.b, t2.x, t2.y
+ Group Key: t1.a, t1.b
-> Hash Join
Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-> Seq Scan on t2
@@ -1304,7 +1304,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
QUERY PLAN
------------------------------------------------------
HashAggregate
- Group Key: t1.a, t1.b, t2.x, t2.z
+ Group Key: t1.a, t1.b, t2.z
-> Hash Join
Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-> Seq Scan on t2
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index fcad5c4093..292196b169 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -2135,7 +2135,6 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
QUERY PLAN
---------------------------
GroupAggregate
- Group Key: $2
InitPlan 1 (returns $1)
-> Result
InitPlan 3 (returns $2)
@@ -2143,7 +2142,7 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
-> Result
SubPlan 2
-> Result
-(9 rows)
+(8 rows)
select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
grouping
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index a82b8fb8fb..1b900fddf8 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -164,10 +164,9 @@ SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
QUERY PLAN
--------------------------------
GroupAggregate
- Group Key: c
-> Result
One-Time Filter: false
-(4 rows)
+(3 rows)
SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
c | sum
@@ -805,10 +804,10 @@ SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOI
-- Empty join relation because of empty outer side, no partitionwise agg plan
EXPLAIN (COSTS OFF)
SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2;
- QUERY PLAN
----------------------------------------
+ QUERY PLAN
+--------------------------------------
GroupAggregate
- Group Key: pagg_tab1.x, pagg_tab1.y
+ Group Key: pagg_tab1.y
-> Sort
Sort Key: pagg_tab1.y
-> Result
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index c59caf1cb3..f2418413dd 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1340,7 +1340,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
QUERY PLAN
--------------------------------------------------------------------------------
GroupAggregate
- Group Key: t1.c, t2.c, t3.c
+ Group Key: t1.c, t3.c
-> Sort
Sort Key: t1.c, t3.c
-> Append
@@ -1484,7 +1484,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
QUERY PLAN
--------------------------------------------------------------------------------
GroupAggregate
- Group Key: t1.c, t2.c, t3.c
+ Group Key: t1.c, t3.c
-> Sort
Sort Key: t1.c, t3.c
-> Append
@@ -1576,7 +1576,7 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c
Sort
Sort Key: t1.c
-> HashAggregate
- Group Key: t1.c, t2.c
+ Group Key: t1.c
-> Append
-> Hash Join
Hash Cond: (t2_1.c = t1_1.c)