From a944899866fdf001475a819fd5e8f3e94c4aeb4c Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Thu, 2 May 2024 13:07:21 +0000
Subject: [PATCH v1] Fix bogus lateral dependency after self-join removal

---
 src/backend/optimizer/plan/analyzejoins.c | 28 +++++++++++++++-----
 src/test/regress/expected/join.out        | 32 +++++++++++++++++++++++
 src/test/regress/sql/join.sql             | 16 ++++++++++++
 3 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 506fccd20c..76f1dffb43 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -457,15 +457,25 @@ remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
 			phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, relid, subst);
 			phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, ojrelid, subst);
 			Assert(!bms_is_empty(phinfo->ph_eval_at));	/* checked previously */
+
 			phinfo->ph_needed = replace_relid(phinfo->ph_needed, relid, subst);
 			phinfo->ph_needed = replace_relid(phinfo->ph_needed, ojrelid, subst);
 			/* ph_needed might or might not become empty */
+
 			phinfo->ph_lateral = replace_relid(phinfo->ph_lateral, relid, subst);
+			/*
+			 * ph_lateral might contain rels mentioned in ph_eval_at after the
+			 * replacement, remove them.
+			 */
+			phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
 			/* ph_lateral might or might not be empty */
+
 			phv->phrels = replace_relid(phv->phrels, relid, subst);
 			phv->phrels = replace_relid(phv->phrels, ojrelid, subst);
 			Assert(!bms_is_empty(phv->phrels));
+
 			replace_varno((Node *) phv->phexpr, relid, subst);
+
 			Assert(phv->phnullingrels == NULL); /* no need to adjust */
 		}
 	}
@@ -2292,25 +2302,29 @@ remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
 
 		if (IsA(jlnode, RangeTblRef))
 		{
-			RangeTblRef *ref = (RangeTblRef *) jlnode;
-			RangeTblEntry *rte = root->simple_rte_array[ref->rtindex];
+			int			varno = ((RangeTblRef *) jlnode)->rtindex;
+			RangeTblEntry *rte = root->simple_rte_array[varno];
 
 			/*
-			 * We only care about base relations from which we select
-			 * something.
+			 * We only consider ordinary relations as candidates to be removed,
+			 * and these relations should not have TABLESAMPLE clauses
+			 * specified.  Removing a relation with TABLESAMPLE clause could
+			 * potentially change the syntax of the query.
 			 */
 			if (rte->rtekind == RTE_RELATION &&
 				rte->relkind == RELKIND_RELATION &&
-				root->simple_rel_array[ref->rtindex] != NULL)
+				rte->tablesample == NULL)
 			{
-				Assert(!bms_is_member(ref->rtindex, relids));
-				relids = bms_add_member(relids, ref->rtindex);
+				Assert(!bms_is_member(varno, relids));
+				relids = bms_add_member(relids, varno);
 			}
 		}
 		else if (IsA(jlnode, List))
+		{
 			/* Recursively go inside the sub-joinlist */
 			toRemove = remove_self_joins_recurse(root, (List *) jlnode,
 												 toRemove);
+		}
 		else
 			elog(ERROR, "unrecognized joinlist node type: %d",
 				 (int) nodeTag(jlnode));
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 8b640c2fc2..24ad31a3ee 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6215,6 +6215,38 @@ select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
                Filter: (b IS NOT NULL)
 (6 rows)
 
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+    join lateral
+      (select * from sj tablesample system(t1.b)) s
+    on t1.a = s.a;
+              QUERY PLAN               
+---------------------------------------
+ Nested Loop
+   ->  Seq Scan on sj t1
+   ->  Memoize
+         Cache Key: t1.a, t1.b
+         Cache Mode: binary
+         ->  Sample Scan on sj
+               Sampling: system (t1.b)
+               Filter: (t1.a = a)
+(8 rows)
+
+-- Ensure that SJE does not form a self-referential lateral dependency
+explain (costs off)
+select * from sj t1
+    left join lateral
+      (select t1.a as t1a, * from sj t2) s
+    on true
+where t1.a = s.a;
+        QUERY PLAN         
+---------------------------
+ Seq Scan on sj t2
+   Filter: (a IS NOT NULL)
+(2 rows)
+
 -- Degenerated case.
 explain (costs off)
 select * from
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c4c6c7b8ba..132a721139 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2352,6 +2352,22 @@ where exists (select * from sj q
 explain (costs off)
 select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
 
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+    join lateral
+      (select * from sj tablesample system(t1.b)) s
+    on t1.a = s.a;
+
+-- Ensure that SJE does not form a self-referential lateral dependency
+explain (costs off)
+select * from sj t1
+    left join lateral
+      (select t1.a as t1a, * from sj t2) s
+    on true
+where t1.a = s.a;
+
 -- Degenerated case.
 explain (costs off)
 select * from
-- 
2.34.1

