Re: Removing redundant grouping columns

Tom Lane Tue, 27 Dec 2022 15:25:01 -0800

I wrote:
> This patch is aimed at being smarter about cases where we have
> redundant GROUP BY entries, for example
> SELECT ... WHERE a.x = b.y GROUP BY a.x, b.y;


The cfbot didn't like this, because of a variable that wasn't
used in non-assert builds.  Fixed in v2.

                        regards, tom lane

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 9524765650..450f724c49 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -3667,6 +3667,13 @@ appendGroupByClause(List *tlist, deparse_expr_cxt *context)
 	 */
 	Assert(!query->groupingSets);
 
+	/*
+	 * We intentionally print query->groupClause not processed_groupClause,
+	 * leaving it to the remote planner to get rid of any redundant GROUP BY
+	 * items again.  This is necessary in case processed_groupClause reduced
+	 * to empty, and in any case the redundancy situation on the remote might
+	 * be different than what we think here.
+	 */
 	foreach(lc, query->groupClause)
 	{
 		SortGroupClause *grp = (SortGroupClause *) lfirst(lc);
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 1a2c2a665c..befb2fd4d3 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2864,16 +2864,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN                                       
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)
 
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z  
@@ -2990,14 +2987,13 @@ select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
                     QUERY PLAN                     
 ---------------------------------------------------
  GroupAggregate
-   Output: ($0), sum(ft1.c1)
-   Group Key: $0
+   Output: $0, sum(ft1.c1)
    InitPlan 1 (returns $0)
      ->  Seq Scan on pg_catalog.pg_enum
    ->  Foreign Scan on public.ft1
-         Output: $0, ft1.c1
+         Output: ft1.c1
          Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
-(8 rows)
+(7 rows)
 
 select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
  exists |  sum   
@@ -3382,14 +3378,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
 --------------------------------------------------------------------------------------------------
  GroupAggregate
    Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
-   Group Key: ft2.c2
    ->  Sort
-         Output: c2, c1
+         Output: c1, c2
          Sort Key: ft2.c1 USING <^
          ->  Foreign Scan on public.ft2
-               Output: c2, c1
+               Output: c1, c2
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(9 rows)
+(8 rows)
 
 -- This should not be pushed either.
 explain (verbose, costs off)
@@ -3458,14 +3453,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
 --------------------------------------------------------------------------------------------------
  GroupAggregate
    Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
-   Group Key: ft2.c2
    ->  Sort
-         Output: c2, c1
+         Output: c1, c2
          Sort Key: ft2.c1 USING <^
          ->  Foreign Scan on public.ft2
-               Output: c2, c1
+               Output: c1, c2
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(9 rows)
+(8 rows)
 
 -- Cleanup
 drop operator class my_op_class using btree;
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index b9268e32dd..722ebc932b 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -3345,9 +3345,9 @@ estimate_path_cost_size(PlannerInfo *root,
 			}
 
 			/* Get number of grouping columns and possible number of groups */
-			numGroupCols = list_length(root->parse->groupClause);
+			numGroupCols = list_length(root->processed_groupClause);
 			numGroups = estimate_num_groups(root,
-											get_sortgrouplist_exprs(root->parse->groupClause,
+											get_sortgrouplist_exprs(root->processed_groupClause,
 																	fpinfo->grouped_tlist),
 											input_rows, NULL, NULL);
 
@@ -3636,7 +3636,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root,
 	 * pathkeys, adjust the costs with that function.  Otherwise, adjust the
 	 * costs by applying the same heuristic as for the scan or join case.
 	 */
-	if (!grouping_is_sortable(root->parse->groupClause) ||
+	if (!grouping_is_sortable(root->processed_groupClause) ||
 		!pathkeys_contained_in(pathkeys, root->group_pathkeys))
 	{
 		Path		sort_path;	/* dummy for result of cost_sort */
@@ -6202,7 +6202,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
 		Index		sgref = get_pathtarget_sortgroupref(grouping_target, i);
 		ListCell   *l;
 
-		/* Check whether this expression is part of GROUP BY clause */
+		/*
+		 * Check whether this expression is part of GROUP BY clause.  Note we
+		 * check the whole GROUP BY clause not just processed_groupClause,
+		 * because we will ship all of it, cf. appendGroupByClause.
+		 */
 		if (sgref && get_sortgroupref_clause_noerr(sgref, query->groupClause))
 		{
 			TargetEntry *tle;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 30c9143183..a8a5cb6ef8 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2476,7 +2476,7 @@ agg_retrieve_direct(AggState *aggstate)
 					 * If we are grouping, check whether we've crossed a group
 					 * boundary.
 					 */
-					if (node->aggstrategy != AGG_PLAIN)
+					if (node->aggstrategy != AGG_PLAIN && node->numCols > 0)
 					{
 						tmpcontext->ecxt_innertuple = firstSlot;
 						if (!ExecQual(aggstate->phase->eqfunctions[node->numCols - 1],
@@ -3480,8 +3480,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 			 */
 			if (aggnode->aggstrategy == AGG_SORTED)
 			{
-				Assert(aggnode->numCols > 0);
-
 				/*
 				 * Build a separate function for each subset of columns that
 				 * need to be compared.
@@ -3507,7 +3505,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 				}
 
 				/* and for all grouped columns, unless already computed */
-				if (phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
+				if (aggnode->numCols > 0 &&
+					phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
 				{
 					phasedata->eqfunctions[aggnode->numCols - 1] =
 						execTuplesMatchPrepare(scanDesc,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index a9943cd6e0..bb11dbec7b 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1152,18 +1152,62 @@ List *
 make_pathkeys_for_sortclauses(PlannerInfo *root,
 							  List *sortclauses,
 							  List *tlist)
+{
+	List	   *result;
+	bool		sortable;
+
+	result = make_pathkeys_for_sortclauses_extended(root,
+													&sortclauses,
+													tlist,
+													false,
+													&sortable);
+	/* It's caller error if not all clauses were sortable */
+	Assert(sortable);
+	return result;
+}
+
+/*
+ * make_pathkeys_for_sortclauses_extended
+ *		Generate a pathkeys list that represents the sort order specified
+ *		by a list of SortGroupClauses
+ *
+ * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
+ *
+ * If remove_redundant is true, then any sort clauses that are found to
+ * give rise to redundant pathkeys are removed from the sortclauses list
+ * (which therefore must be pass-by-reference in this version).
+ *
+ * *sortable is set to true if all the sort clauses are in fact sortable.
+ * If any are not, they are ignored except for setting *sortable false.
+ * (In that case, the output pathkey list isn't really useful.  However,
+ * we process the whole sortclauses list anyway, because it's still valid
+ * to remove any clauses that can be proven redundant via the eclass logic.
+ * Even though we'll have to hash in that case, we might as well not hash
+ * redundant columns.)
+ */
+List *
+make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
+									   List **sortclauses,
+									   List *tlist,
+									   bool remove_redundant,
+									   bool *sortable)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
 
-	foreach(l, sortclauses)
+	*sortable = true;
+	foreach(l, *sortclauses)
 	{
 		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Expr	   *sortkey;
 		PathKey    *pathkey;
 
 		sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
-		Assert(OidIsValid(sortcl->sortop));
+		if (!OidIsValid(sortcl->sortop))
+		{
+			*sortable = false;
+			continue;
+		}
 		pathkey = make_pathkey_from_sortop(root,
 										   sortkey,
 										   root->nullable_baserels,
@@ -1175,6 +1219,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
 			pathkeys = lappend(pathkeys, pathkey);
+		else if (remove_redundant)
+			*sortclauses = foreach_delete_current(*sortclauses, l);
 	}
 	return pathkeys;
 }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 66139928e8..a4c6062b41 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -2404,7 +2404,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 	 * sizing.
 	 */
 	maxref = 0;
-	foreach(lc, root->parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
 
@@ -2415,7 +2415,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 	grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber));
 
 	/* Now look up the column numbers in the child's tlist */
-	foreach(lc, root->parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
 		TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8a41e1e6d3..bcc10942cc 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -95,17 +95,9 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL;
 #define EXPRKIND_TABLEFUNC			11
 #define EXPRKIND_TABLEFUNC_LATERAL	12
 
-/* Passthrough data for standard_qp_callback */
-typedef struct
-{
-	List	   *activeWindows;	/* active windows, if any */
-	List	   *groupClause;	/* overrides parse->groupClause */
-} standard_qp_extra;
-
 /*
  * Data specific to grouping sets
  */
-
 typedef struct
 {
 	List	   *rollups;
@@ -129,6 +121,13 @@ typedef struct
 								 * clauses per Window */
 } WindowClauseSortData;
 
+/* Passthrough data for standard_qp_callback */
+typedef struct
+{
+	List	   *activeWindows;	/* active windows, if any */
+	grouping_sets_data *gset_data;	/* grouping sets data, if any */
+} standard_qp_extra;
+
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -636,6 +635,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	root->rowMarks = NIL;
 	memset(root->upper_rels, 0, sizeof(root->upper_rels));
 	memset(root->upper_targets, 0, sizeof(root->upper_targets));
+	root->processed_groupClause = NIL;
 	root->processed_tlist = NIL;
 	root->update_colnos = NIL;
 	root->grouping_map = NULL;
@@ -1032,9 +1032,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	}
 	parse->havingQual = (Node *) newHaving;
 
-	/* Remove any redundant GROUP BY columns */
-	remove_useless_groupby_columns(root);
-
 	/*
 	 * If we have any outer joins, try to reduce them to plain inner joins.
 	 * This step is most easily done after we've done expression
@@ -1393,11 +1390,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		{
 			gset_data = preprocess_grouping_sets(root);
 		}
-		else
+		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			if (parse->groupClause)
-				parse->groupClause = preprocess_groupclause(root, NIL);
+			root->processed_groupClause = preprocess_groupclause(root, NIL);
+			/* Remove any redundant GROUP BY columns */
+			remove_useless_groupby_columns(root);
 		}
 
 		/*
@@ -1475,9 +1473,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 		/* Set up data needed by standard_qp_callback */
 		qp_extra.activeWindows = activeWindows;
-		qp_extra.groupClause = (gset_data
-								? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
-								: parse->groupClause);
+		qp_extra.gset_data = gset_data;
 
 		/*
 		 * Generate the best unsorted and presorted paths for the scan/join
@@ -2011,6 +2007,12 @@ preprocess_grouping_sets(PlannerInfo *root)
 	gd->unsortable_refs = NULL;
 	gd->unsortable_sets = NIL;
 
+	/*
+	 * We don't currently make any attempt to optimize the groupClause when
+	 * there are grouping sets, so just duplicate it in processed_groupClause.
+	 */
+	root->processed_groupClause = parse->groupClause;
+
 	if (parse->groupClause)
 	{
 		ListCell   *lc;
@@ -2638,7 +2640,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	int			relid;
 
 	/* No chance to do anything if there are less than two GROUP BY items */
-	if (list_length(parse->groupClause) < 2)
+	if (list_length(root->processed_groupClause) < 2)
 		return;
 
 	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
@@ -2652,7 +2654,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	 */
 	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
 										   (list_length(parse->rtable) + 1));
-	foreach(lc, parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
 		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2747,7 +2749,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	{
 		List	   *new_groupby = NIL;
 
-		foreach(lc, parse->groupClause)
+		foreach(lc, root->processed_groupClause)
 		{
 			SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
 			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2764,7 +2766,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 				new_groupby = lappend(new_groupby, sgc);
 		}
 
-		parse->groupClause = new_groupby;
+		root->processed_groupClause = new_groupby;
 	}
 }
 
@@ -2784,6 +2786,10 @@ remove_useless_groupby_columns(PlannerInfo *root)
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
  *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
  * For grouping sets, the order of items is instead forced to agree with that
  * of the grouping set (and items not in the grouping set are skipped). The
  * work of sorting the order of grouping set elements to match the ORDER BY if
@@ -2814,7 +2820,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)
 
 	/* If no ORDER BY, nothing useful to do here */
 	if (parse->sortClause == NIL)
-		return parse->groupClause;
+		return list_copy(parse->groupClause);
 
 	/*
 	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
@@ -2845,7 +2851,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)
 
 	/* If no match at all, no point in reordering GROUP BY */
 	if (new_groupclause == NIL)
-		return parse->groupClause;
+		return list_copy(parse->groupClause);
 
 	/*
 	 * Add any remaining GROUP BY items to the new list, but only if we were
@@ -2861,10 +2867,10 @@ preprocess_groupclause(PlannerInfo *root, List *force)
 
 		if (list_member_ptr(new_groupclause, gc))
 			continue;			/* it matched an ORDER BY item */
-		if (partial_match)
-			return parse->groupClause;	/* give up, no common sort possible */
-		if (!OidIsValid(gc->sortop))
-			return parse->groupClause;	/* give up, GROUP BY can't be sorted */
+		if (partial_match)		/* give up, no common sort possible */
+			return list_copy(parse->groupClause);
+		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
+			return list_copy(parse->groupClause);
 		new_groupclause = lappend(new_groupclause, gc);
 	}
 
@@ -3148,64 +3154,39 @@ reorder_grouping_sets(List *groupingSets, List *sortclause)
 }
 
 /*
- * make_pathkeys_for_groupagg
- *		Determine the pathkeys for the GROUP BY clause and/or any ordered
- *		aggregates.  We expect at least one of these here.
+ * adjust_group_pathkeys_for_groupagg
+ *		Add pathkeys to root->group_pathkeys to reflect the best set of
+ *		pre-ordered input for ordered aggregates.
  *
- * Building the pathkeys for the GROUP BY is simple.  Most of the complexity
- * involved here comes from calculating the best pathkeys for ordered
- * aggregates.  We define "best" as the pathkeys that suit the most number of
+ * We define "best" as the pathkeys that suit the largest number of
  * aggregate functions.  We find these by looking at the first ORDER BY /
  * DISTINCT aggregate and take the pathkeys for that before searching for
  * other aggregates that require the same or a more strict variation of the
  * same pathkeys.  We then repeat that process for any remaining aggregates
  * with different pathkeys and if we find another set of pathkeys that suits a
- * larger number of aggregates then we return those pathkeys instead.
- *
- * *number_groupby_pathkeys gets set to the number of elements in the returned
- * list that belong to the GROUP BY clause.  Any elements above this number
- * must belong to ORDER BY / DISTINCT aggregates.
+ * larger number of aggregates then we select those pathkeys instead.
  *
  * When the best pathkeys are found we also mark each Aggref that can use
  * those pathkeys as aggpresorted = true.
  */
-static List *
-make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
-						   int *number_groupby_pathkeys)
+static void
+adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
 {
-	List	   *grouppathkeys = NIL;
+	List	   *grouppathkeys = root->group_pathkeys;
 	List	   *bestpathkeys;
 	Bitmapset  *bestaggs;
 	Bitmapset  *unprocessed_aggs;
 	ListCell   *lc;
 	int			i;
 
-	Assert(groupClause != NIL || root->numOrderedAggs > 0);
-
-	if (groupClause != NIL)
-	{
-		/* no pathkeys possible if there's an unsortable GROUP BY */
-		if (!grouping_is_sortable(groupClause))
-		{
-			*number_groupby_pathkeys = 0;
-			return NIL;
-		}
-
-		grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
-													  tlist);
-		*number_groupby_pathkeys = list_length(grouppathkeys);
-	}
-	else
-		*number_groupby_pathkeys = 0;
+	/* Shouldn't be here if there are grouping sets */
+	Assert(root->parse->groupingSets == NIL);
+	/* Shouldn't be here unless there are some ordered aggregates */
+	Assert(root->numOrderedAggs > 0);
 
-	/*
-	 * We can't add pathkeys for ordered aggregates if there are any grouping
-	 * sets.  All handling specific to ordered aggregates must be done by the
-	 * executor in that case.
-	 */
-	if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL ||
-		!enable_presorted_aggregate)
-		return grouppathkeys;
+	/* Do nothing if disabled */
+	if (!enable_presorted_aggregate)
+		return;
 
 	/*
 	 * Make a first pass over all AggInfos to collect a Bitmapset containing
@@ -3327,6 +3308,14 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
 		}
 	}
 
+	/*
+	 * If we found any ordered aggregates, update root->group_pathkeys to add
+	 * the best set of aggregate pathkeys.  Note that bestpathkeys includes
+	 * the original GROUP BY pathkeys already.
+	 */
+	if (bestpathkeys != NIL)
+		root->group_pathkeys = bestpathkeys;
+
 	/*
 	 * Now that we've found the best set of aggregates we can set the
 	 * presorted flag to indicate to the executor that it needn't bother
@@ -3347,16 +3336,6 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
 			aggref->aggpresorted = true;
 		}
 	}
-
-	/*
-	 * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
-	 * aggregates, then return bestpathkeys, otherwise return the
-	 * grouppathkeys.
-	 */
-	if (bestpathkeys != NIL)
-		return bestpathkeys;
-
-	return grouppathkeys;
 }
 
 /*
@@ -3374,11 +3353,62 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 	 * Calculate pathkeys that represent grouping/ordering and/or ordered
 	 * aggregate requirements.
 	 */
-	if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
-		root->group_pathkeys = make_pathkeys_for_groupagg(root,
-														  qp_extra->groupClause,
-														  tlist,
-														  &root->num_groupby_pathkeys);
+	if (qp_extra->gset_data)
+	{
+		/*
+		 * With grouping sets, just use the first RollupData's groupClause. We
+		 * don't make any effort to optimize grouping clauses when there are
+		 * grouping sets, nor can we combine aggregate ordering keys with
+		 * grouping.
+		 */
+		List	   *rollups = qp_extra->gset_data->rollups;
+		List	   *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
+
+		if (grouping_is_sortable(groupClause))
+		{
+			root->group_pathkeys = make_pathkeys_for_sortclauses(root,
+																 groupClause,
+																 tlist);
+			root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+		}
+		else
+		{
+			root->group_pathkeys = NIL;
+			root->num_groupby_pathkeys = 0;
+		}
+	}
+	else if (parse->groupClause || root->numOrderedAggs > 0)
+	{
+		/*
+		 * With a plain GROUP BY list, we can remove any grouping items that
+		 * are proven redundant by EquivalenceClass processing.  For example,
+		 * we can remove y given "WHERE x = y GROUP BY x, y".  These aren't
+		 * especially common cases, but they're nearly free to detect.  Note
+		 * that we remove redundant items from processed_groupClause but not
+		 * the original parse->groupClause.
+		 */
+		bool		sortable;
+
+		root->group_pathkeys =
+			make_pathkeys_for_sortclauses_extended(root,
+												   &root->processed_groupClause,
+												   tlist,
+												   true,
+												   &sortable);
+		if (!sortable)
+		{
+			/* Can't sort; no point in considering aggregate ordering either */
+			root->group_pathkeys = NIL;
+			root->num_groupby_pathkeys = 0;
+		}
+		else
+		{
+			root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+			/* If we have ordered aggs, consider adding onto group_pathkeys */
+			if (root->numOrderedAggs > 0)
+				adjust_group_pathkeys_for_groupagg(root);
+		}
+	}
 	else
 	{
 		root->group_pathkeys = NIL;
@@ -3531,8 +3561,8 @@ get_number_of_groups(PlannerInfo *root,
 		}
 		else
 		{
-			/* Plain GROUP BY */
-			groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+			/* Plain GROUP BY -- estimate based on optimized groupClause */
+			groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
 												 target_list);
 
 			dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
@@ -3610,8 +3640,8 @@ create_grouping_paths(PlannerInfo *root,
 
 		/*
 		 * Determine whether it's possible to perform sort-based
-		 * implementations of grouping.  (Note that if groupClause is empty,
-		 * grouping_is_sortable() is trivially true, and all the
+		 * implementations of grouping.  (Note that if processed_groupClause
+		 * is empty, grouping_is_sortable() is trivially true, and all the
 		 * pathkeys_contained_in() tests will succeed too, so that we'll
 		 * consider every surviving input path.)
 		 *
@@ -3620,7 +3650,7 @@ create_grouping_paths(PlannerInfo *root,
 		 * must consider any sorted-input plan.
 		 */
 		if ((gd && gd->rollups != NIL)
-			|| grouping_is_sortable(parse->groupClause))
+			|| grouping_is_sortable(root->processed_groupClause))
 			flags |= GROUPING_CAN_USE_SORT;
 
 		/*
@@ -3645,7 +3675,7 @@ create_grouping_paths(PlannerInfo *root,
 		 */
 		if ((parse->groupClause != NIL &&
 			 root->numOrderedAggs == 0 &&
-			 (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
+			 (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
 			flags |= GROUPING_CAN_USE_HASH;
 
 		/*
@@ -3856,6 +3886,9 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 		 * partial partitionwise aggregation.  But if partial aggregation is
 		 * not supported in general then we can't use it for partitionwise
 		 * aggregation either.
+		 *
+		 * Check parse->groupClause not processed_groupClause, because it's
+		 * okay if some of the partitioning columns were proved redundant.
 		 */
 		if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
 			group_by_has_partkey(input_rel, extra->targetList,
@@ -5214,8 +5247,9 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
 		Expr	   *expr = (Expr *) lfirst(lc);
 		Index		sgref = get_pathtarget_sortgroupref(final_target, i);
 
-		if (sgref && parse->groupClause &&
-			get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+		if (sgref && root->processed_groupClause &&
+			get_sortgroupref_clause_noerr(sgref,
+										  root->processed_groupClause) != NULL)
 		{
 			/*
 			 * It's a grouping column, so add it to the input target as-is.
@@ -5283,7 +5317,6 @@ make_partial_grouping_target(PlannerInfo *root,
 							 PathTarget *grouping_target,
 							 Node *havingQual)
 {
-	Query	   *parse = root->parse;
 	PathTarget *partial_target;
 	List	   *non_group_cols;
 	List	   *non_group_exprs;
@@ -5299,8 +5332,9 @@ make_partial_grouping_target(PlannerInfo *root,
 		Expr	   *expr = (Expr *) lfirst(lc);
 		Index		sgref = get_pathtarget_sortgroupref(grouping_target, i);
 
-		if (sgref && parse->groupClause &&
-			get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+		if (sgref && root->processed_groupClause &&
+			get_sortgroupref_clause_noerr(sgref,
+										  root->processed_groupClause) != NULL)
 		{
 			/*
 			 * It's a grouping column, so add it to the partial_target as-is.
@@ -5747,7 +5781,6 @@ make_window_input_target(PlannerInfo *root,
 						 PathTarget *final_target,
 						 List *activeWindows)
 {
-	Query	   *parse = root->parse;
 	PathTarget *input_target;
 	Bitmapset  *sgrefs;
 	List	   *flattenable_cols;
@@ -5755,7 +5788,7 @@ make_window_input_target(PlannerInfo *root,
 	int			i;
 	ListCell   *lc;
 
-	Assert(parse->hasWindowFuncs);
+	Assert(root->parse->hasWindowFuncs);
 
 	/*
 	 * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
@@ -5782,7 +5815,7 @@ make_window_input_target(PlannerInfo *root,
 	}
 
 	/* Add in sortgroupref numbers of GROUP BY clauses, too */
-	foreach(lc, parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);
 
@@ -6701,7 +6734,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
+										 root->processed_groupClause,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
@@ -6716,7 +6749,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   parse->groupClause,
+										   root->processed_groupClause,
 										   havingQual,
 										   dNumGroups));
 			}
@@ -6785,7 +6818,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 root->processed_groupClause,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6794,7 +6827,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   root->processed_groupClause,
 											   havingQual,
 											   dNumGroups));
 
@@ -6825,7 +6858,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 									 grouped_rel->reltarget,
 									 AGG_HASHED,
 									 AGGSPLIT_SIMPLE,
-									 parse->groupClause,
+									 root->processed_groupClause,
 									 havingQual,
 									 agg_costs,
 									 dNumGroups));
@@ -6846,7 +6879,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 									 grouped_rel->reltarget,
 									 AGG_HASHED,
 									 AGGSPLIT_FINAL_DESERIAL,
-									 parse->groupClause,
+									 root->processed_groupClause,
 									 havingQual,
 									 agg_final_costs,
 									 dNumGroups));
@@ -7048,7 +7081,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 parse->groupClause,
+										 root->processed_groupClause,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
@@ -7057,7 +7090,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 						 create_group_path(root,
 										   partially_grouped_rel,
 										   path,
-										   parse->groupClause,
+										   root->processed_groupClause,
 										   NIL,
 										   dNumPartialGroups));
 		}
@@ -7117,7 +7150,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
+												 root->processed_groupClause,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
@@ -7126,7 +7159,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   parse->groupClause,
+												   root->processed_groupClause,
 												   NIL,
 												   dNumPartialPartialGroups));
 		}
@@ -7147,7 +7180,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 								 partially_grouped_rel->reltarget,
 								 AGG_HASHED,
 								 AGGSPLIT_INITIAL_SERIAL,
-								 parse->groupClause,
+								 root->processed_groupClause,
 								 NIL,
 								 agg_partial_costs,
 								 dNumPartialGroups));
@@ -7165,7 +7198,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 										 partially_grouped_rel->reltarget,
 										 AGG_HASHED,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 parse->groupClause,
+										 root->processed_groupClause,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialPartialGroups));
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 4cec12ab19..51d545bbd6 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1008,6 +1008,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	subroot->rowMarks = NIL;
 	memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
 	memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
+	subroot->processed_groupClause = NIL;
 	subroot->processed_tlist = NIL;
 	subroot->update_colnos = NIL;
 	subroot->grouping_map = NULL;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 654dba61aa..11c3545637 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -406,6 +406,22 @@ struct PlannerInfo
 	/* Result tlists chosen by grouping_planner for upper-stage processing */
 	struct PathTarget *upper_targets[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);
 
+	/*
+	 * The fully-processed groupClause is kept here.  It differs from
+	 * parse->groupClause in that we remove any items that we can prove
+	 * redundant, so that only the columns named here actually need to be
+	 * compared to determine grouping.  Note that it's possible for *all* the
+	 * items to be proven redundant, implying that there is only one group
+	 * containing all the query's rows.  Hence, if you want to check whether
+	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
+	 * nonempty processed_groupClause.
+	 *
+	 * Currently, when grouping sets are specified we do not attempt to
+	 * optimize the groupClause, so that processed_groupClause will be
+	 * identical to parse->groupClause.
+	 */
+	List	   *processed_groupClause;
+
 	/*
 	 * The fully-processed targetlist is kept here.  It differs from
 	 * parse->targetList in that (for INSERT) it's been reordered to match the
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 41f765d342..7719f89122 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -228,6 +228,11 @@ extern List *build_join_pathkeys(PlannerInfo *root,
 extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
 										   List *sortclauses,
 										   List *tlist);
+extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
+													List **sortclauses,
+													List *tlist,
+													bool remove_redundant,
+													bool *sortable);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 309c2dc865..648da0f7c4 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1289,7 +1289,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
                       QUERY PLAN                      
 ------------------------------------------------------
  HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.y
+   Group Key: t1.a, t1.b
    ->  Hash Join
          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
          ->  Seq Scan on t2
@@ -1304,7 +1304,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
                       QUERY PLAN                      
 ------------------------------------------------------
  HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.z
+   Group Key: t1.a, t1.b, t2.z
    ->  Hash Join
          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
          ->  Seq Scan on t2
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index fcad5c4093..292196b169 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -2135,7 +2135,6 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
         QUERY PLAN         
 ---------------------------
  GroupAggregate
-   Group Key: $2
    InitPlan 1 (returns $1)
      ->  Result
    InitPlan 3 (returns $2)
@@ -2143,7 +2142,7 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
    ->  Result
    SubPlan 2
      ->  Result
-(9 rows)
+(8 rows)
 
 select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
  grouping 
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index a82b8fb8fb..1b900fddf8 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -164,10 +164,9 @@ SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
            QUERY PLAN           
 --------------------------------
  GroupAggregate
-   Group Key: c
    ->  Result
          One-Time Filter: false
-(4 rows)
+(3 rows)
 
 SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
  c | sum 
@@ -805,10 +804,10 @@ SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOI
 -- Empty join relation because of empty outer side, no partitionwise agg plan
 EXPLAIN (COSTS OFF)
 SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2;
-              QUERY PLAN               
----------------------------------------
+              QUERY PLAN              
+--------------------------------------
  GroupAggregate
-   Group Key: pagg_tab1.x, pagg_tab1.y
+   Group Key: pagg_tab1.y
    ->  Sort
          Sort Key: pagg_tab1.y
          ->  Result
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index c59caf1cb3..f2418413dd 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1340,7 +1340,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1484,7 +1484,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1576,7 +1576,7 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c
  Sort
    Sort Key: t1.c
    ->  HashAggregate
-         Group Key: t1.c, t2.c
+         Group Key: t1.c
          ->  Append
                ->  Hash Join
                      Hash Cond: (t2_1.c = t1_1.c)

Re: Removing redundant grouping columns

Reply via email to