On 4/12/24 06:44, Tom Lane wrote:
* I'm pretty unconvinced by group_keys_reorder_by_pathkeys (which
I notice has already had one band-aid added to it since commit).
In particular, it seems to believe that the pathkeys and clauses
lists match one-for-one, but I seriously doubt that that invariant
remains guaranteed after the cleanup steps

     /* append the remaining group pathkeys (will be treated as not sorted) */
     *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
                                              *group_pathkeys);
     *group_clauses = list_concat_unique_ptr(new_group_clauses,
                                             *group_clauses);

For that to be reliable, the SortGroupClauses added to
new_group_clauses in the main loop have to be exactly those
that are associated with the same pathkeys in the old lists.
I doubt that that's necessarily true in the presence of redundant
grouping clauses.  (Maybe we can't get here with any redundant
grouping clauses, but still, we don't really guarantee uniqueness of
SortGroupClauses, much less that they are never copied which is what
you need if you want to believe that pointer equality is sufficient
for de-duping here.  PathKeys are explicitly made to be safe to compare
pointer-wise, but I know of no such guarantee for SortGroupClauses.)
I spent a lot of time inventing situations with SortGroupClause duplicates. Unfortunately, it looks impossible so far. But because we really don't guarantee uniqueness, I changed the code to survive in this case. Also, I added assertion checking to be sure we don't have logical mistakes here - see attachment. About the band-aid mentioned above - as I see, 4169850 introduces the same trick in planner.c. So, it looks like result of design of the current code.

--
regards,
Andrei Lepikhov
Postgres Professional
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 5d9597adcd..aa80f6486c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -380,15 +380,18 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 
 	/*
 	 * We're going to search within just the first num_groupby_pathkeys of
-	 * *group_pathkeys.  The thing is that root->group_pathkeys is passed as
+	 * *group_pathkeys. The thing is that root->group_pathkeys is passed as
 	 * *group_pathkeys containing grouping pathkeys altogether with aggregate
-	 * pathkeys.  If we process aggregate pathkeys we could get an invalid
+	 * pathkeys. If we process aggregate pathkeys we could get an invalid
 	 * result of get_sortgroupref_clause_noerr(), because their
-	 * pathkey->pk_eclass->ec_sortref doesn't referece query targetlist.  So,
+	 * pathkey->pk_eclass->ec_sortref doesn't reference query targetlist. So,
 	 * we allocate a separate list of pathkeys for lookups.
 	 */
 	grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
 
+	/* Make a new copy before reordering clauses */
+	*group_clauses = list_copy(*group_clauses);
+
 	/*
 	 * Walk the pathkeys (determining ordering of the input path) and see if
 	 * there's a matching GROUP BY key. If we find one, we append it to the
@@ -400,8 +403,8 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 	 */
 	foreach(lc, pathkeys)
 	{
-		PathKey    *pathkey = (PathKey *) lfirst(lc);
-		SortGroupClause *sgc;
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
 
 		/*
 		 * Pathkeys are built in a way that allows simply comparing pointers.
@@ -431,17 +434,25 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 		Assert(OidIsValid(sgc->sortop));
 
 		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+
+		/*
+		 * Keeping in mind that the SortGroupClause list doesn't guarantee
+		 * unique pointers we must explicitly transfer elements one-by-one.
+		 */
 		new_group_clauses = lappend(new_group_clauses, sgc);
+		*group_clauses = list_delete_ptr(*group_clauses, sgc);
 	}
 
 	/* remember the number of pathkeys with a matching GROUP BY key */
 	n = list_length(new_group_pathkeys);
 
-	/* append the remaining group pathkeys (will be treated as not sorted) */
+	/*
+	 * Append the remaining group pathkeys (will be treated as not sorted) and
+	 * grouping clauses.
+	 */
 	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
 											 *group_pathkeys);
-	*group_clauses = list_concat_unique_ptr(new_group_clauses,
-											*group_clauses);
+	*group_clauses = list_concat(new_group_clauses, *group_clauses);
 
 	list_free(grouping_pathkeys);
 	return n;
@@ -484,9 +495,9 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
-	Query	   *parse = root->parse;
-	List	   *infos = NIL;
-	PathKeyInfo *info;
+	Query		   *parse = root->parse;
+	List		   *infos = NIL;
+	PathKeyInfo	   *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
@@ -561,6 +572,23 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 		}
 	}
 
+#ifdef USE_ASSERT_CHECKING
+	{
+		PathKeyInfo	   *pinfo = linitial_node(PathKeyInfo, infos);
+		ListCell	   *lc;
+
+		/* Test consistency of info structures */
+		for_each_from(lc, infos, 1)
+		{
+			info = lfirst_node(PathKeyInfo, lc);
+
+			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
+			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
+			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
+			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+		}
+	}
+#endif
 	return infos;
 }
 

Reply via email to