From 4ba98ddafde4269a2e2897ac0167d82a05067f56 Mon Sep 17 00:00:00 2001
From: Andy Fan <zhihui.fan1213@gmail.com>
Date: Fri, 13 Oct 2023 14:39:34 +0800
Subject: [PATCH v5] Pull up ANY-SUBLINK with the necessary lateral support.

For ANY-SUBLINK, we adopted a two-stage pull-up approach to handle
different types of scenarios. In the first stage, the sublink is pulled up
as a subquery. Because of this, when writing this code, we did not have
the ability to perform lateral joins, and therefore, we were unable to
pull up Var with varlevelsup=1. Now that we have the ability to use
lateral joins, we can eliminate this limitation.

Author: Andy Fan <zhihui.fan1213@gmail.com>

Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Richard Guo <guofenglinux@gmail.com>
Reviewed-by: Alena Rybakina <lena.ribackina@yandex.ru>
Reviewed-by: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
---
 .../postgres_fdw/expected/postgres_fdw.out    |   6 +-
 contrib/postgres_fdw/sql/postgres_fdw.sql     |   4 +-
 src/backend/optimizer/plan/subselect.c        |  15 ++-
 src/test/regress/expected/join.out            |  14 +--
 src/test/regress/expected/subselect.out       | 102 ++++++++++++++++++
 src/test/regress/sql/join.sql                 |   8 +-
 src/test/regress/sql/subselect.sql            |  32 ++++++
 7 files changed, 161 insertions(+), 20 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 144c114d0fe..932a18624a8 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -11633,7 +11633,7 @@ CREATE FOREIGN TABLE foreign_tbl (b int)
 CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
   SERVER loopback OPTIONS (table_name 'base_tbl');
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
+SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
                                  QUERY PLAN                                  
 -----------------------------------------------------------------------------
  Seq Scan on public.base_tbl
@@ -11641,7 +11641,7 @@ SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
    Filter: (SubPlan 1)
    SubPlan 1
      ->  Result
-           Output: base_tbl.a
+           Output: base_tbl.a, (random() > '0'::double precision)
            ->  Append
                  ->  Async Foreign Scan on public.foreign_tbl foreign_tbl_1
                        Remote SQL: SELECT NULL FROM public.base_tbl
@@ -11649,7 +11649,7 @@ SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
                        Remote SQL: SELECT NULL FROM public.base_tbl
 (11 rows)
 
-SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
+SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
  a 
 ---
  1
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index a303bfb322d..033b5b815da 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3854,8 +3854,8 @@ CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
   SERVER loopback OPTIONS (table_name 'base_tbl');
 
 EXPLAIN (VERBOSE, COSTS OFF)
-SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
-SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
+SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
+SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
 
 -- Clean up
 DROP FOREIGN TABLE foreign_tbl CASCADE;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 7a9fe88fec3..f46ec3d1826 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1278,14 +1278,21 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	List	   *subquery_vars;
 	Node	   *quals;
 	ParseState *pstate;
+	Relids sub_ref_outer_relids = NULL;
+	bool use_lateral = false;
 
 	Assert(sublink->subLinkType == ANY_SUBLINK);
 
 	/*
-	 * The sub-select must not refer to any Vars of the parent query. (Vars of
-	 * higher levels should be okay, though.)
+	 * If the sub-select refers to any Vars of the parent query, we so let's
+	 * considering it as LATERAL.  (Vars of higher levels don't matter here.)
 	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
+	sub_ref_outer_relids = pull_varnos_of_level(NULL, (Node *) subselect, 1);
+
+	use_lateral = !bms_is_empty(sub_ref_outer_relids) &&
+						 bms_is_subset(sub_ref_outer_relids, available_rels);
+
+	if (!use_lateral && !bms_is_empty(sub_ref_outer_relids))
 		return NULL;
 
 	/*
@@ -1323,7 +1330,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	nsitem = addRangeTableEntryForSubquery(pstate,
 										   subselect,
 										   makeAlias("ANY_subquery", NIL),
-										   false,
+										   use_lateral,
 										   false);
 	rte = nsitem->p_rte;
 	parse->rtable = lappend(parse->rtable, rte);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index b95d30f6586..156aca128ab 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5251,7 +5251,7 @@ reset enable_nestloop;
 explain (costs off)
 select a.unique1, b.unique2
   from onek a left join onek b on a.unique1 = b.unique2
-  where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+  where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
                         QUERY PLAN                        
 ----------------------------------------------------------
  Hash Join
@@ -5267,7 +5267,7 @@ select a.unique1, b.unique2
 
 select a.unique1, b.unique2
   from onek a left join onek b on a.unique1 = b.unique2
-  where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+  where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
  unique1 | unique2 
 ---------+---------
      123 |     123
@@ -7173,12 +7173,12 @@ select * from (values (0), (1)) v(id),
 lateral (select * from int8_tbl t1,
          lateral (select * from
                     (select * from int8_tbl t2
-                     where q1 = any (select q2 from int8_tbl t3
+                     where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
                                      where q2 = (select greatest(t1.q1,t2.q2))
                                        and (select v.id=0)) offset 0) ss2) ss
          where t1.q1 = ss.q2) ss0;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
  Nested Loop
    Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
    ->  Seq Scan on public.int8_tbl t1
@@ -7195,7 +7195,7 @@ lateral (select * from int8_tbl t1,
                      Filter: (SubPlan 3)
                      SubPlan 3
                        ->  Result
-                             Output: t3.q2
+                             Output: t3.q2, (random() > '0'::double precision)
                              One-Time Filter: $4
                              InitPlan 1 (returns $2)
                                ->  Result
@@ -7212,7 +7212,7 @@ select * from (values (0), (1)) v(id),
 lateral (select * from int8_tbl t1,
          lateral (select * from
                     (select * from int8_tbl t2
-                     where q1 = any (select q2 from int8_tbl t3
+                     where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
                                      where q2 = (select greatest(t1.q1,t2.q2))
                                        and (select v.id=0)) offset 0) ss2) ss
          where t1.q1 = ss.q2) ss0;
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index a3a4d03d104..17df6b5dc9c 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1926,3 +1926,105 @@ select * from x for update;
    Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
 (2 rows)
 
+-- Pull-up the direct-correlated ANY_SUBLINK
+explain (costs off)
+select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.odd = b.odd) AND (a.hundred = b.hundred))
+   ->  Seq Scan on tenk1 a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.odd, b.hundred
+               ->  Seq Scan on tenk2 b
+(7 rows)
+
+explain (costs off)
+select * from tenk1 A where exists
+(select 1 from tenk2 B
+where A.hundred in (select C.hundred FROM tenk2 C
+WHERE c.odd = b.odd));
+           QUERY PLAN            
+---------------------------------
+ Nested Loop Semi Join
+   Join Filter: (SubPlan 1)
+   ->  Seq Scan on tenk1 a
+   ->  Materialize
+         ->  Seq Scan on tenk2 b
+   SubPlan 1
+     ->  Seq Scan on tenk2 c
+           Filter: (odd = b.odd)
+(8 rows)
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.hundred is not avaiable.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+           QUERY PLAN            
+---------------------------------
+ Nested Loop Left Join
+   Join Filter: (SubPlan 1)
+   ->  Seq Scan on tenk1 a
+   ->  Materialize
+         ->  Seq Scan on tenk2 b
+   SubPlan 1
+     ->  Seq Scan on tenk2 c
+           Filter: (odd = b.odd)
+(8 rows)
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.odd is not avaiable for this.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
+           QUERY PLAN            
+---------------------------------
+ Nested Loop Left Join
+   Join Filter: (SubPlan 1)
+   ->  Seq Scan on tenk1 a
+   ->  Materialize
+         ->  Seq Scan on tenk2 b
+   SubPlan 1
+     ->  Seq Scan on tenk2 c
+           Filter: (odd = a.odd)
+(8 rows)
+
+-- should be able to pull up since all the references is available
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop Left Join
+   ->  Seq Scan on tenk1 a
+   ->  Materialize
+         ->  Hash Join
+               Hash Cond: ((b.odd = c.odd) AND (b.hundred = c.hundred))
+               ->  Seq Scan on tenk2 b
+               ->  Hash
+                     ->  HashAggregate
+                           Group Key: c.odd, c.hundred
+                           ->  Seq Scan on tenk2 c
+(10 rows)
+
+-- we can pull up the sublink into the inner JoinExpr.
+explain (costs off)
+SELECT * FROM tenk1 A INNER JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Join
+   Hash Cond: (c.odd = b.odd)
+   ->  Hash Join
+         Hash Cond: (a.hundred = c.hundred)
+         ->  Seq Scan on tenk1 a
+         ->  Hash
+               ->  HashAggregate
+                     Group Key: c.odd, c.hundred
+                     ->  Seq Scan on tenk2 c
+   ->  Hash
+         ->  Seq Scan on tenk2 b
+(11 rows)
+
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 3e5032b04dd..2121caf7e3e 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1852,11 +1852,11 @@ reset enable_nestloop;
 explain (costs off)
 select a.unique1, b.unique2
   from onek a left join onek b on a.unique1 = b.unique2
-  where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+  where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
 
 select a.unique1, b.unique2
   from onek a left join onek b on a.unique1 = b.unique2
-  where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+  where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
 
 --
 -- test full-join strength reduction
@@ -2578,7 +2578,7 @@ select * from (values (0), (1)) v(id),
 lateral (select * from int8_tbl t1,
          lateral (select * from
                     (select * from int8_tbl t2
-                     where q1 = any (select q2 from int8_tbl t3
+                     where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
                                      where q2 = (select greatest(t1.q1,t2.q2))
                                        and (select v.id=0)) offset 0) ss2) ss
          where t1.q1 = ss.q2) ss0;
@@ -2587,7 +2587,7 @@ select * from (values (0), (1)) v(id),
 lateral (select * from int8_tbl t1,
          lateral (select * from
                     (select * from int8_tbl t2
-                     where q1 = any (select q2 from int8_tbl t3
+                     where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
                                      where q2 = (select greatest(t1.q1,t2.q2))
                                        and (select v.id=0)) offset 0) ss2) ss
          where t1.q1 = ss.q2) ss0;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index bd2e39efd48..49ce5fc99a8 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -968,3 +968,35 @@ select * from (with x as (select 2 as y) select * from x) ss;
 explain (verbose, costs off)
 with x as (select * from subselect_tbl)
 select * from x for update;
+
+-- Pull-up the direct-correlated ANY_SUBLINK
+explain (costs off)
+select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
+
+explain (costs off)
+select * from tenk1 A where exists
+(select 1 from tenk2 B
+where A.hundred in (select C.hundred FROM tenk2 C
+WHERE c.odd = b.odd));
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.hundred is not avaiable.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.odd is not avaiable for this.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
+
+-- should be able to pull up since all the references is available
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+
+-- we can pull up the sublink into the inner JoinExpr.
+explain (costs off)
+SELECT * FROM tenk1 A INNER JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
-- 
2.21.0

