This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 546d7b8a9df7e0eac484236f020641d795d3e787
Author: Divyesh Vanjare <vanja...@vmware.com>
AuthorDate: Wed Aug 30 14:36:55 2023 -0400

    Fix recursive CTE mergejoin having motion on WTS
    
    For a recursive CTE, planner currently creates a mergejoin plan which
    moves the WorkTableScan results to a singleQE using
    CdbPathLocus_MakeSingleQE
    That would introduce motion in between Union and WorkTableScan. The
    WorkTableScan results should be local to each segment and shouldn't be
    broadcasted/moved to other segments. We shouldn't be allowing any motion
    in between Union and WorkTableScan. This commit adds a check for the
    last resort case to avoid adding that motion. If there is a WTS on any
    side in the last resort case, we should just fail (no possible plan
    using mergejoin)
    
    Added simplified test to force a mergejoin for such recursive CTE, the
    test shouldn't create a mergejoin plan.
    
    Co-authored-by: Zhenghua Lyu <z...@pivotal.io>
    Co-authored-by: Alexandra Wang<lew...@pivotal.io>
---
 src/backend/cdb/cdbpath.c                      |  9 ++-
 src/test/regress/expected/gp_recursive_cte.out | 81 ++++++++++++++++++++++++++
 src/test/regress/sql/gp_recursive_cte.sql      | 45 ++++++++++++++
 3 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c
index 9eb95e087c..2d90f63ad9 100644
--- a/src/backend/cdb/cdbpath.c
+++ b/src/backend/cdb/cdbpath.c
@@ -2231,15 +2231,18 @@ cdbpath_motion_for_join(PlannerInfo *root,
                        CdbPathLocus_MakeReplicated(&large_rel->move_to,
                                                                                
CdbPathLocus_NumSegments(small_rel->locus), 0);
 
-               /* Last resort: Move both rels to a single qExec. */
-               else
+               /* Last resort: Move both rels to a single qExec
+                * only if there is no wts on either rels*/
+               else if (!outer.has_wts && !inner.has_wts)
                {
                        int numsegments = 
CdbPathLocus_CommonSegments(outer.locus,
                                                                                
                                  inner.locus);
                        CdbPathLocus_MakeSingleQE(&outer.move_to, numsegments);
                        CdbPathLocus_MakeSingleQE(&inner.move_to, numsegments);
                }
-       } /* partitioned */
+               else
+                       goto fail;
+       }                                                       /* partitioned 
*/
 
        /*
         * Move outer.
diff --git a/src/test/regress/expected/gp_recursive_cte.out 
b/src/test/regress/expected/gp_recursive_cte.out
index e0f47dac81..389f888e6b 100644
--- a/src/test/regress/expected/gp_recursive_cte.out
+++ b/src/test/regress/expected/gp_recursive_cte.out
@@ -797,3 +797,84 @@ select * from cte;
  10
 (10 rows)
 
+-- Test recursive CTE when the non-recursive term is a table scan with a
+-- predicate on the distribution key, and the recursive term joins the CTE with
+-- the same table on its non-distribution key
+create table recursive_table_6(a numeric(4), b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into recursive_table_6 values (0::numeric, 3);
+insert into recursive_table_6 values (2::numeric, 0);
+insert into recursive_table_6 values (5::numeric, 0);
+analyze recursive_table_6;
+SELECT $query$
+WITH RECURSIVE cte (i, j) AS (
+    SELECT a, b FROM recursive_table_6 WHERE a = 0::numeric::numeric
+    UNION ALL
+    SELECT a, b FROM recursive_table_6, cte WHERE cte.i = recursive_table_6.b
+)
+SELECT i, j FROM cte;
+$query$ AS qry \gset
+EXPLAIN (COSTS OFF)
+    :qry ;
+                                   QUERY PLAN                                  
  
+---------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Recursive Union
+         ->  Seq Scan on recursive_table_6
+               Filter: (a = '0'::numeric)
+         ->  Hash Join
+               Hash Cond: (cte.i = (recursive_table_6_1.b)::numeric)
+               ->  WorkTable Scan on cte
+               ->  Hash
+                     ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                           ->  Seq Scan on recursive_table_6 
recursive_table_6_1
+ Optimizer: Postgres query optimizer
+(11 rows)
+
+:qry ;
+ i | j 
+---+---
+ 0 | 3
+ 2 | 0
+ 5 | 0
+(3 rows)
+
+-- Test recursive CTE doesnt create a plan with motion on top of worktablescan
+CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a);
+SET enable_nestloop = off;
+SET enable_hashjoin = off;
+SET enable_mergejoin = on;
+explain (costs off) with recursive rcte as
+   (
+      ( select a, b, 1::integer recursion_level from t1 order by 1 )
+      union all
+      select parent_table.a, parent_table.b, rcte.recursion_level + 1
+      from
+      ( select a, b from t1 order by 1 ) parent_table
+      join rcte on rcte.b = parent_table.a
+   )
+select count(*) from rcte;
+                                   QUERY PLAN                                  
  
+---------------------------------------------------------------------------------
+ Finalize Aggregate
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         ->  Partial Aggregate
+               ->  Recursive Union
+                     ->  Sort
+                           Sort Key: t1.a
+                           ->  Seq Scan on t1
+                     ->  Nested Loop
+                           Join Filter: (t1_1.a = rcte.b)
+                           ->  WorkTable Scan on rcte
+                           ->  Materialize
+                                 ->  Broadcast Motion 3:3  (slice2; segments: 
3)
+                                       ->  Sort
+                                             Sort Key: t1_1.a
+                                             ->  Seq Scan on t1 t1_1
+ Optimizer: Postgres query optimizer
+(16 rows)
+
+RESET enable_nestloop;
+RESET enable_hashjoin;
+RESET enable_mergejoin;
diff --git a/src/test/regress/sql/gp_recursive_cte.sql 
b/src/test/regress/sql/gp_recursive_cte.sql
index 90aeda8814..549a29fc86 100644
--- a/src/test/regress/sql/gp_recursive_cte.sql
+++ b/src/test/regress/sql/gp_recursive_cte.sql
@@ -518,3 +518,48 @@ with recursive cte (n) as
   ) q
 )
 select * from cte;
+
+-- Test recursive CTE when the non-recursive term is a table scan with a
+-- predicate on the distribution key, and the recursive term joins the CTE with
+-- the same table on its non-distribution key
+create table recursive_table_6(a numeric(4), b int);
+insert into recursive_table_6 values (0::numeric, 3);
+insert into recursive_table_6 values (2::numeric, 0);
+insert into recursive_table_6 values (5::numeric, 0);
+analyze recursive_table_6;
+
+SELECT $query$
+WITH RECURSIVE cte (i, j) AS (
+    SELECT a, b FROM recursive_table_6 WHERE a = 0::numeric::numeric
+    UNION ALL
+    SELECT a, b FROM recursive_table_6, cte WHERE cte.i = recursive_table_6.b
+)
+SELECT i, j FROM cte;
+$query$ AS qry \gset
+
+EXPLAIN (COSTS OFF)
+    :qry ;
+
+:qry ;
+
+-- Test recursive CTE doesnt create a plan with motion on top of worktablescan
+CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a);
+SET enable_nestloop = off;
+SET enable_hashjoin = off;
+SET enable_mergejoin = on;
+
+explain (costs off) with recursive rcte as
+   (
+      ( select a, b, 1::integer recursion_level from t1 order by 1 )
+      union all
+
+      select parent_table.a, parent_table.b, rcte.recursion_level + 1
+      from
+      ( select a, b from t1 order by 1 ) parent_table
+      join rcte on rcte.b = parent_table.a
+   )
+select count(*) from rcte;
+
+RESET enable_nestloop;
+RESET enable_hashjoin;
+RESET enable_mergejoin;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org
For additional commands, e-mail: commits-h...@cloudberry.apache.org

Reply via email to