(cloudberry) 21/24: Preserve param info when bringing a path to OuterQuery locus

yjhjstz Wed, 25 Dec 2024 15:39:00 -0800

This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit e5d742b80606d5ba711a4ce650bfda37b764ec7c
Author: Wenru Yan <[email protected]>
AuthorDate: Tue Aug 16 04:26:36 2022 -0700

    Preserve param info when bringing a path to OuterQuery locus
    
    When we bring a path to OuterQuery locus, we need to keep its param info,
    because this path may further join with other paths. For instance,
    
            select * from a where a.i in
                    (select count(b.j) from b, c,
                            lateral (select * from d where d.j = c.j limit 10) s
                    where s.i = a.i
                    );
    
    The path for subquery 's' requires parameter from 'c'. When we bring this 
path
    to OuterQuery locus, its param info needs to be preserved, so that when 
joining
    's' with 'b' we can have correct param info.
---
 src/backend/optimizer/path/allpaths.c              |  2 +-
 src/backend/optimizer/plan/createplan.c            | 14 +++---
 src/test/regress/expected/subselect_gp.out         | 54 ++++++++++++++++++++++
 .../regress/expected/subselect_gp_optimizer.out    | 54 ++++++++++++++++++++++
 src/test/regress/sql/subselect_gp.sql              | 24 ++++++++++
 5 files changed, 140 insertions(+), 8 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c 
b/src/backend/optimizer/path/allpaths.c
index c0c3491ac4..a3d0064bae 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -573,7 +573,7 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, 
List *outer_quals)
                                                                                
                                          path,
                                                                                
                                          path->parent->reltarget,
                                                                                
                                          outer_quals,
-                                                                               
                                          false);
+                                                                               
                                          true);
                add_path(rel, path, root);
        }
        set_cheapest(rel);
diff --git a/src/backend/optimizer/plan/createplan.c 
b/src/backend/optimizer/plan/createplan.c
index 396fc318fc..3d60e33f94 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -2273,13 +2273,6 @@ create_projection_plan(PlannerInfo *root, ProjectionPath 
*best_path, int flags)
                {
                        List       *all_clauses = 
best_path->cdb_restrict_clauses;
 
-                       /* Replace any outer-relation variables with nestloop 
params */
-                       if (best_path->path.param_info)
-                       {
-                               all_clauses = (List *)
-                                       replace_nestloop_params(root, (Node *) 
all_clauses);
-                       }
-
                        /* Sort clauses into best execution order */
                        all_clauses = order_qual_clauses(root, all_clauses);
 
@@ -2288,6 +2281,13 @@ create_projection_plan(PlannerInfo *root, ProjectionPath 
*best_path, int flags)
 
                        /* but we actually also want the pseudoconstants */
                        pseudoconstants = extract_actual_clauses(all_clauses, 
true);
+
+                       /* Replace any outer-relation variables with nestloop 
params */
+                       if (best_path->path.param_info)
+                       {
+                               scan_clauses = (List *)
+                                       replace_nestloop_params(root, (Node *) 
scan_clauses);
+                       }
                }
 
                /* We need a Result node */
diff --git a/src/test/regress/expected/subselect_gp.out 
b/src/test/regress/expected/subselect_gp.out
index 7c7bedb786..966335ab41 100644
--- a/src/test/regress/expected/subselect_gp.out
+++ b/src/test/regress/expected/subselect_gp.out
@@ -2982,3 +2982,57 @@ select * from issue_12656 where (i, j) in (select 
distinct on (i) i, j from issu
  1 | 10002
 (1 row)
 
+---
+--- Test param info is preserved when bringing a path to OuterQuery locus
+---
+drop table if exists param_t;
+NOTICE:  table "param_t" does not exist, skipping
+create table param_t (i int, j int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into param_t select i, i from generate_series(1,10)i;
+analyze param_t;
+explain (costs off)
+select * from param_t a where a.i in
+       (select count(b.j) from param_t b, param_t c,
+               lateral (select * from param_t d where d.j = c.j limit 10) s
+       where s.i = a.i
+       );
+                                                QUERY PLAN                     
                            
+-----------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on param_t a
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Aggregate
+                 ->  Nested Loop
+                       ->  Nested Loop
+                             ->  Materialize
+                                   ->  Broadcast Motion 3:3  (slice2; 
segments: 3)
+                                         ->  Seq Scan on param_t c
+                             ->  Materialize
+                                   ->  Result
+                                         Filter: (d.i = a.i)
+                                         ->  Limit
+                                               ->  Result
+                                                     Filter: (d.j = c.j)
+                                                     ->  Materialize
+                                                           ->  Broadcast 
Motion 3:3  (slice3; segments: 3)
+                                                                 ->  Seq Scan 
on param_t d
+                       ->  Materialize
+                             ->  Broadcast Motion 3:3  (slice4; segments: 3)
+                                   ->  Seq Scan on param_t b
+ Optimizer: Postgres query optimizer
+(23 rows)
+
+select * from param_t a where a.i in
+       (select count(b.j) from param_t b, param_t c,
+               lateral (select * from param_t d where d.j = c.j limit 10) s
+       where s.i = a.i
+       );
+ i  | j  
+----+----
+ 10 | 10
+(1 row)
+
+drop table if exists param_t;
diff --git a/src/test/regress/expected/subselect_gp_optimizer.out 
b/src/test/regress/expected/subselect_gp_optimizer.out
index 2af82e2cae..7a4e4628c1 100644
--- a/src/test/regress/expected/subselect_gp_optimizer.out
+++ b/src/test/regress/expected/subselect_gp_optimizer.out
@@ -3063,3 +3063,57 @@ select * from issue_12656 where (i, j) in (select 
distinct on (i) i, j from issu
  1 | 10002
 (1 row)
 
+---
+--- Test param info is preserved when bringing a path to OuterQuery locus
+---
+drop table if exists param_t;
+NOTICE:  table "param_t" does not exist, skipping
+create table param_t (i int, j int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into param_t select i, i from generate_series(1,10)i;
+analyze param_t;
+explain (costs off)
+select * from param_t a where a.i in
+       (select count(b.j) from param_t b, param_t c,
+               lateral (select * from param_t d where d.j = c.j limit 10) s
+       where s.i = a.i
+       );
+                                                QUERY PLAN                     
                            
+-----------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on param_t a
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Aggregate
+                 ->  Nested Loop
+                       ->  Nested Loop
+                             ->  Materialize
+                                   ->  Broadcast Motion 3:3  (slice2; 
segments: 3)
+                                         ->  Seq Scan on param_t c
+                             ->  Materialize
+                                   ->  Result
+                                         Filter: (d.i = a.i)
+                                         ->  Limit
+                                               ->  Result
+                                                     Filter: (d.j = c.j)
+                                                     ->  Materialize
+                                                           ->  Broadcast 
Motion 3:3  (slice3; segments: 3)
+                                                                 ->  Seq Scan 
on param_t d
+                       ->  Materialize
+                             ->  Broadcast Motion 3:3  (slice4; segments: 3)
+                                   ->  Seq Scan on param_t b
+ Optimizer: Postgres query optimizer
+(23 rows)
+
+select * from param_t a where a.i in
+       (select count(b.j) from param_t b, param_t c,
+               lateral (select * from param_t d where d.j = c.j limit 10) s
+       where s.i = a.i
+       );
+ i  | j  
+----+----
+ 10 | 10
+(1 row)
+
+drop table if exists param_t;
diff --git a/src/test/regress/sql/subselect_gp.sql 
b/src/test/regress/sql/subselect_gp.sql
index 8e36f03bf9..5391e4747c 100644
--- a/src/test/regress/sql/subselect_gp.sql
+++ b/src/test/regress/sql/subselect_gp.sql
@@ -1190,3 +1190,27 @@ explain (costs off, verbose)
 select * from issue_12656 where (i, j) in (select distinct on (i) i, j from 
issue_12656 order by i, j desc);
 
 select * from issue_12656 where (i, j) in (select distinct on (i) i, j from 
issue_12656 order by i, j desc);
+
+---
+--- Test param info is preserved when bringing a path to OuterQuery locus
+---
+drop table if exists param_t;
+
+create table param_t (i int, j int);
+insert into param_t select i, i from generate_series(1,10)i;
+analyze param_t;
+
+explain (costs off)
+select * from param_t a where a.i in
+       (select count(b.j) from param_t b, param_t c,
+               lateral (select * from param_t d where d.j = c.j limit 10) s
+       where s.i = a.i
+       );
+select * from param_t a where a.i in
+       (select count(b.j) from param_t b, param_t c,
+               lateral (select * from param_t d where d.j = c.j limit 10) s
+       where s.i = a.i
+       );
+
+
+drop table if exists param_t;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 21/24: Preserve param info when bringing a path to OuterQuery locus

Reply via email to