This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit e5d742b80606d5ba711a4ce650bfda37b764ec7c Author: Wenru Yan <[email protected]> AuthorDate: Tue Aug 16 04:26:36 2022 -0700 Preserve param info when bringing a path to OuterQuery locus When we bring a path to OuterQuery locus, we need to keep its param info, because this path may further join with other paths. For instance, select * from a where a.i in (select count(b.j) from b, c, lateral (select * from d where d.j = c.j limit 10) s where s.i = a.i ); The path for subquery 's' requires parameter from 'c'. When we bring this path to OuterQuery locus, its param info needs to be preserved, so that when joining 's' with 'b' we can have correct param info. --- src/backend/optimizer/path/allpaths.c | 2 +- src/backend/optimizer/plan/createplan.c | 14 +++--- src/test/regress/expected/subselect_gp.out | 54 ++++++++++++++++++++++ .../regress/expected/subselect_gp_optimizer.out | 54 ++++++++++++++++++++++ src/test/regress/sql/subselect_gp.sql | 24 ++++++++++ 5 files changed, 140 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c0c3491ac4..a3d0064bae 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -573,7 +573,7 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, List *outer_quals) path, path->parent->reltarget, outer_quals, - false); + true); add_path(rel, path, root); } set_cheapest(rel); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 396fc318fc..3d60e33f94 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -2273,13 +2273,6 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) { List *all_clauses = best_path->cdb_restrict_clauses; - /* Replace any outer-relation variables with nestloop params */ - if (best_path->path.param_info) - { - all_clauses = (List *) - replace_nestloop_params(root, (Node *) all_clauses); - } - /* Sort clauses into best execution order */ all_clauses = order_qual_clauses(root, all_clauses); @@ -2288,6 +2281,13 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, int flags) /* but we actually also want the pseudoconstants */ pseudoconstants = extract_actual_clauses(all_clauses, true); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->path.param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } } /* We need a Result node */ diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index 7c7bedb786..966335ab41 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -2982,3 +2982,57 @@ select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issu 1 | 10002 (1 row) +--- +--- Test param info is preserved when bringing a path to OuterQuery locus +--- +drop table if exists param_t; +NOTICE: table "param_t" does not exist, skipping +create table param_t (i int, j int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into param_t select i, i from generate_series(1,10)i; +analyze param_t; +explain (costs off) +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on param_t a + Filter: (SubPlan 1) + SubPlan 1 + -> Aggregate + -> Nested Loop + -> Nested Loop + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on param_t c + -> Materialize + -> Result + Filter: (d.i = a.i) + -> Limit + -> Result + Filter: (d.j = c.j) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on param_t d + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on param_t b + Optimizer: Postgres query optimizer +(23 rows) + +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); + i | j +----+---- + 10 | 10 +(1 row) + +drop table if exists param_t; diff --git a/src/test/regress/expected/subselect_gp_optimizer.out b/src/test/regress/expected/subselect_gp_optimizer.out index 2af82e2cae..7a4e4628c1 100644 --- a/src/test/regress/expected/subselect_gp_optimizer.out +++ b/src/test/regress/expected/subselect_gp_optimizer.out @@ -3063,3 +3063,57 @@ select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issu 1 | 10002 (1 row) +--- +--- Test param info is preserved when bringing a path to OuterQuery locus +--- +drop table if exists param_t; +NOTICE: table "param_t" does not exist, skipping +create table param_t (i int, j int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into param_t select i, i from generate_series(1,10)i; +analyze param_t; +explain (costs off) +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on param_t a + Filter: (SubPlan 1) + SubPlan 1 + -> Aggregate + -> Nested Loop + -> Nested Loop + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on param_t c + -> Materialize + -> Result + Filter: (d.i = a.i) + -> Limit + -> Result + Filter: (d.j = c.j) + -> Materialize + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on param_t d + -> Materialize + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on param_t b + Optimizer: Postgres query optimizer +(23 rows) + +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); + i | j +----+---- + 10 | 10 +(1 row) + +drop table if exists param_t; diff --git a/src/test/regress/sql/subselect_gp.sql b/src/test/regress/sql/subselect_gp.sql index 8e36f03bf9..5391e4747c 100644 --- a/src/test/regress/sql/subselect_gp.sql +++ b/src/test/regress/sql/subselect_gp.sql @@ -1190,3 +1190,27 @@ explain (costs off, verbose) select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j desc); select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j desc); + +--- +--- Test param info is preserved when bringing a path to OuterQuery locus +--- +drop table if exists param_t; + +create table param_t (i int, j int); +insert into param_t select i, i from generate_series(1,10)i; +analyze param_t; + +explain (costs off) +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); +select * from param_t a where a.i in + (select count(b.j) from param_t b, param_t c, + lateral (select * from param_t d where d.j = c.j limit 10) s + where s.i = a.i + ); + + +drop table if exists param_t; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
