On 4/12/24 06:44, Tom Lane wrote:
* Speaking of pathnodes.h, PathKeyInfo is a pretty uninformative node
type name, and the comments provided for it are not going to educate
anybody.  What is the "association" between the pathkeys and clauses?
I guess the clauses are supposed to be SortGroupClauses, but not all
pathkeys match up to SortGroupClauses, so what then?  This is very
underspecified, and fuzzy thinking about data structures tends to lead
to bugs.
I'm not the best in English documentation and naming style. So, feel free to check my version.

So I'm quite afraid that there are still bugs lurking here.
What's more, given that the plans this patch makes apparently
seldom win when you don't put a thumb on the scales, it might
take a very long time to isolate those bugs.  If the patch
produces a faulty plan (e.g. not sorted properly) we'd never
notice if that plan isn't chosen, and even if it is chosen
it probably wouldn't produce anything as obviously wrong as
a crash.
I added checkings on the proper order of pathkeys and clauses.
If you really care about that, we should spend additional time and rewrite the code to generate an order of clauses somewhere in the plan creation phase. For example, during the create_group_plan call, we could use the processed_groupClause list, cycle through subpath->pathkeys, set the order, and detect whether the pathkeys list corresponds to the group-by or is enough to build a grouping plan.
Anyway, make this part of code more resistant to blunders is another story.

--
regards,
Andrei Lepikhov
Postgres Professional
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index aa80f6486c..9c079270ec 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -461,7 +461,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -470,7 +470,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -482,7 +482,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -495,15 +495,15 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
-	Query		   *parse = root->parse;
-	List		   *infos = NIL;
-	PathKeyInfo	   *info;
+	Query			   *parse = root->parse;
+	List			   *infos = NIL;
+	GroupByOrdering	   *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -539,7 +539,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -564,7 +564,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -574,18 +574,29 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo	   *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering	   *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell	   *lc;
 
 		/* Test consistency of info structures */
 		for_each_from(lc, infos, 1)
 		{
-			info = lfirst_node(PathKeyInfo, lc);
+			ListCell *lc1, *lc2;
+
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
 			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
 			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+				SortGroupClause *sgc = lfirst_node(SortGroupClause, lc1);
+				PathKey *pk = lfirst_node(PathKey, lc2);
+
+				if (pk->pk_eclass->ec_sortref != sgc->tleSortGroupRef)
+					elog(ERROR, "Order of group-by clauses doesn't correspond incoming sort order");
+			}
 		}
 	}
 #endif
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 861656a192..4751a700ab 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6882,7 +6882,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -6963,7 +6963,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				/* process all potentially interesting grouping reorderings */
 				foreach(lc2, pathkey_orderings)
 				{
-					PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+					GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 					/* restore the path (we replace it in the loop) */
 					path = path_save;
@@ -7214,7 +7214,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 				/* restore the path (we replace it in the loop) */
 				path = path_save;
@@ -7270,7 +7270,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 			/* process all potentially interesting grouping reorderings */
 			foreach(lc2, pathkey_orderings)
 			{
-				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+				GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
 
 
 				/* restore the path (we replace it in the loop) */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 6c71098f2d..b3862500b6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -427,6 +427,10 @@ struct PlannerInfo
 	 * containing all the query's rows.  Hence, if you want to check whether
 	 * GROUP BY was specified, test for nonempty parse->groupClause, not for
 	 * nonempty processed_groupClause.
+	 * Optimiser chooses specific order of group-by clauses during the upper
+	 * paths generation process, attempting to use different strategies to
+	 * minimize number of sorts or engage incremental sort.
+	 * See get_useful_group_keys_orderings for details.
 	 *
 	 * Currently, when grouping sets are specified we do not attempt to
 	 * optimize the groupClause, so that processed_groupClause will be
@@ -1468,14 +1472,20 @@ typedef struct PathKey
 } PathKey;
 
 /*
- * Combines the information about pathkeys and the associated clauses.
+ * Contains an order of group-by clauses and corresponding list of pathkeys.
+ *
+ * Order of SortGroupClause elements must correspond the order in the head of
+ * PathKey list:
+ * tleSortGroupRef of N-th element in the clauses must be the same as the value
+ * of ec_sortref in N-th pathkey equivalence class.
  */
-typedef struct PathKeyInfo
+typedef struct GroupByOrdering
 {
 	NodeTag		type;
+
 	List	   *pathkeys;
 	List	   *clauses;
-} PathKeyInfo;
+} GroupByOrdering;
 
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d551ada325..33eff1e63a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4113,7 +4113,7 @@ manifest_writer
 rfile
 ws_options
 ws_file_info
-PathKeyInfo
+GroupByOrdering
 TidStore
 TidStoreIter
 TidStoreIterResult

Reply via email to