From b93418886e7e322877805f94506490719ad842af Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Thu, 10 Oct 2024 18:14:52 +0900
Subject: [PATCH v1] Remove the RTE_GROUP RTE if we drop the groupClause

For an EXISTS subquery, the only thing that matters is whether it
returns zero or more than zero rows.  Therefore, we remove certain SQL
features that won't affect that, among them the GROUP BY clauses.

After we drop the groupClause, we'd better remove the RTE_GROUP RTE
and clear the hasGroupRTE flag, as they depend on the groupClause.
Failing to do so could result in a bogus RTE_GROUP entry in the parent
query, leading to an assertion failure on the hasGroupRTE flag.

Reported-by: David Rowley
Author: Richard Guo
Discussion: https://postgr.es/m/CAApHDvp2_yht8uPLyWO-kVGWZhYvx5zjGfSrg4fBQ9fsC13V0g@mail.gmail.com
---
 src/backend/optimizer/plan/subselect.c | 23 +++++++++++++++++++++++
 src/test/regress/expected/join.out     | 15 +++++++++++++++
 src/test/regress/sql/join.sql          |  8 ++++++++
 3 files changed, 46 insertions(+)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 6d003cc8e5..f7d2c17b29 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1539,6 +1539,8 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 static bool
 simplify_EXISTS_query(PlannerInfo *root, Query *query)
 {
+	ListCell   *lc;
+
 	/*
 	 * We don't try to simplify at all if the query uses set operations,
 	 * aggregates, grouping sets, SRFs, modifying CTEs, HAVING, OFFSET, or FOR
@@ -1607,6 +1609,27 @@ simplify_EXISTS_query(PlannerInfo *root, Query *query)
 	query->sortClause = NIL;
 	query->hasDistinctOn = false;
 
+	/*
+	 * Since we have thrown away the GROUP BY clauses, we'd better remove the
+	 * RTE_GROUP RTE and clear the hasGroupRTE flag.
+	 */
+	foreach(lc, query->rtable)
+	{
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
+
+		/*
+		 * Remove the RTE_GROUP RTE.  (Since we'll exit the foreach loop
+		 * immediately, we don't bother with foreach_delete_current.)
+		 */
+		if (rte->rtekind == RTE_GROUP)
+		{
+			Assert(query->hasGroupRTE);
+			query->rtable = list_delete_cell(query->rtable, lc);
+			break;
+		}
+	}
+	query->hasGroupRTE = false;
+
 	return true;
 }
 
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 12abd3a0e7..76d671094f 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3127,6 +3127,21 @@ where b.unique2 is null;
          ->  Index Only Scan using tenk1_unique2 on tenk1 b
 (5 rows)
 
+--
+-- regression test for bogus RTE_GROUP entries
+--
+explain (costs off)
+select a.* from tenk1 a
+where exists (select 1 from tenk1 b where a.unique1 = b.unique2 group by b.unique1);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (a.unique1 = b.unique2)
+   ->  Seq Scan on tenk1 a
+   ->  Hash
+         ->  Index Only Scan using tenk1_unique2 on tenk1 b
+(5 rows)
+
 --
 -- regression test for proper handling of outer joins within antijoins
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 0c65e5af4b..6feb576ac1 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -797,6 +797,14 @@ explain (costs off)
 select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2
 where b.unique2 is null;
 
+--
+-- regression test for bogus RTE_GROUP entries
+--
+
+explain (costs off)
+select a.* from tenk1 a
+where exists (select 1 from tenk1 b where a.unique1 = b.unique2 group by b.unique1);
+
 --
 -- regression test for proper handling of outer joins within antijoins
 --
-- 
2.43.0

