This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 546d7b8a9df7e0eac484236f020641d795d3e787 Author: Divyesh Vanjare <vanja...@vmware.com> AuthorDate: Wed Aug 30 14:36:55 2023 -0400 Fix recursive CTE mergejoin having motion on WTS For a recursive CTE, planner currently creates a mergejoin plan which moves the WorkTableScan results to a singleQE using CdbPathLocus_MakeSingleQE That would introduce motion in between Union and WorkTableScan. The WorkTableScan results should be local to each segment and shouldn't be broadcasted/moved to other segments. We shouldn't be allowing any motion in between Union and WorkTableScan. This commit adds a check for the last resort case to avoid adding that motion. If there is a WTS on any side in the last resort case, we should just fail (no possible plan using mergejoin) Added simplified test to force a mergejoin for such recursive CTE, the test shouldn't create a mergejoin plan. Co-authored-by: Zhenghua Lyu <z...@pivotal.io> Co-authored-by: Alexandra Wang<lew...@pivotal.io> --- src/backend/cdb/cdbpath.c | 9 ++- src/test/regress/expected/gp_recursive_cte.out | 81 ++++++++++++++++++++++++++ src/test/regress/sql/gp_recursive_cte.sql | 45 ++++++++++++++ 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 9eb95e087c..2d90f63ad9 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -2231,15 +2231,18 @@ cdbpath_motion_for_join(PlannerInfo *root, CdbPathLocus_MakeReplicated(&large_rel->move_to, CdbPathLocus_NumSegments(small_rel->locus), 0); - /* Last resort: Move both rels to a single qExec. */ - else + /* Last resort: Move both rels to a single qExec + * only if there is no wts on either rels*/ + else if (!outer.has_wts && !inner.has_wts) { int numsegments = CdbPathLocus_CommonSegments(outer.locus, inner.locus); CdbPathLocus_MakeSingleQE(&outer.move_to, numsegments); CdbPathLocus_MakeSingleQE(&inner.move_to, numsegments); } - } /* partitioned */ + else + goto fail; + } /* partitioned */ /* * Move outer. diff --git a/src/test/regress/expected/gp_recursive_cte.out b/src/test/regress/expected/gp_recursive_cte.out index e0f47dac81..389f888e6b 100644 --- a/src/test/regress/expected/gp_recursive_cte.out +++ b/src/test/regress/expected/gp_recursive_cte.out @@ -797,3 +797,84 @@ select * from cte; 10 (10 rows) +-- Test recursive CTE when the non-recursive term is a table scan with a +-- predicate on the distribution key, and the recursive term joins the CTE with +-- the same table on its non-distribution key +create table recursive_table_6(a numeric(4), b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into recursive_table_6 values (0::numeric, 3); +insert into recursive_table_6 values (2::numeric, 0); +insert into recursive_table_6 values (5::numeric, 0); +analyze recursive_table_6; +SELECT $query$ +WITH RECURSIVE cte (i, j) AS ( + SELECT a, b FROM recursive_table_6 WHERE a = 0::numeric::numeric + UNION ALL + SELECT a, b FROM recursive_table_6, cte WHERE cte.i = recursive_table_6.b +) +SELECT i, j FROM cte; +$query$ AS qry \gset +EXPLAIN (COSTS OFF) + :qry ; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Recursive Union + -> Seq Scan on recursive_table_6 + Filter: (a = '0'::numeric) + -> Hash Join + Hash Cond: (cte.i = (recursive_table_6_1.b)::numeric) + -> WorkTable Scan on cte + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on recursive_table_6 recursive_table_6_1 + Optimizer: Postgres query optimizer +(11 rows) + +:qry ; + i | j +---+--- + 0 | 3 + 2 | 0 + 5 | 0 +(3 rows) + +-- Test recursive CTE doesnt create a plan with motion on top of worktablescan +CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a); +SET enable_nestloop = off; +SET enable_hashjoin = off; +SET enable_mergejoin = on; +explain (costs off) with recursive rcte as + ( + ( select a, b, 1::integer recursion_level from t1 order by 1 ) + union all + select parent_table.a, parent_table.b, rcte.recursion_level + 1 + from + ( select a, b from t1 order by 1 ) parent_table + join rcte on rcte.b = parent_table.a + ) +select count(*) from rcte; + QUERY PLAN +--------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Recursive Union + -> Sort + Sort Key: t1.a + -> Seq Scan on t1 + -> Nested Loop + Join Filter: (t1_1.a = rcte.b) + -> WorkTable Scan on rcte + -> Materialize + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Sort + Sort Key: t1_1.a + -> Seq Scan on t1 t1_1 + Optimizer: Postgres query optimizer +(16 rows) + +RESET enable_nestloop; +RESET enable_hashjoin; +RESET enable_mergejoin; diff --git a/src/test/regress/sql/gp_recursive_cte.sql b/src/test/regress/sql/gp_recursive_cte.sql index 90aeda8814..549a29fc86 100644 --- a/src/test/regress/sql/gp_recursive_cte.sql +++ b/src/test/regress/sql/gp_recursive_cte.sql @@ -518,3 +518,48 @@ with recursive cte (n) as ) q ) select * from cte; + +-- Test recursive CTE when the non-recursive term is a table scan with a +-- predicate on the distribution key, and the recursive term joins the CTE with +-- the same table on its non-distribution key +create table recursive_table_6(a numeric(4), b int); +insert into recursive_table_6 values (0::numeric, 3); +insert into recursive_table_6 values (2::numeric, 0); +insert into recursive_table_6 values (5::numeric, 0); +analyze recursive_table_6; + +SELECT $query$ +WITH RECURSIVE cte (i, j) AS ( + SELECT a, b FROM recursive_table_6 WHERE a = 0::numeric::numeric + UNION ALL + SELECT a, b FROM recursive_table_6, cte WHERE cte.i = recursive_table_6.b +) +SELECT i, j FROM cte; +$query$ AS qry \gset + +EXPLAIN (COSTS OFF) + :qry ; + +:qry ; + +-- Test recursive CTE doesnt create a plan with motion on top of worktablescan +CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a); +SET enable_nestloop = off; +SET enable_hashjoin = off; +SET enable_mergejoin = on; + +explain (costs off) with recursive rcte as + ( + ( select a, b, 1::integer recursion_level from t1 order by 1 ) + union all + + select parent_table.a, parent_table.b, rcte.recursion_level + 1 + from + ( select a, b from t1 order by 1 ) parent_table + join rcte on rcte.b = parent_table.a + ) +select count(*) from rcte; + +RESET enable_nestloop; +RESET enable_hashjoin; +RESET enable_mergejoin; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org