(cloudberry) branch main updated: Fix NULL locus of Shared Scan.

avamingli Fri, 17 Oct 2025 17:49:18 -0700

This is an automated email from the ASF dual-hosted git repository.

avamingli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git



The following commit(s) were added to refs/heads/main by this push:
     new 0cd5c6067de Fix NULL locus of Shared Scan.
0cd5c6067de is described below

commit 0cd5c6067de85c50119a995a1477bd96aa1c4765
Author: Zhang Mingli <[email protected]>
AuthorDate: Mon Sep 29 17:38:32 2025 +0800

    Fix NULL locus of Shared Scan.
    
    The shared scan needs its locus type and parallel number to be
    explicitly set when `gp_cte_sharing` is used, even without
    parallelism. Failing to do so resulted in an incorrect NULL locus
    appearing in the query plan.
    
    Fix https://github.com/apache/cloudberry/issues/1376
    
    Authored-by: Zhang Mingli [email protected]
---
 src/backend/optimizer/plan/planshare.c      |  3 ++
 src/test/regress/expected/cbdb_parallel.out | 54 ++++++++++++++++++++++++++++-
 src/test/regress/sql/cbdb_parallel.sql      | 15 +++++++-
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/src/backend/optimizer/plan/planshare.c 
b/src/backend/optimizer/plan/planshare.c
index f60f45f933e..f3b716364ca 100644
--- a/src/backend/optimizer/plan/planshare.c
+++ b/src/backend/optimizer/plan/planshare.c
@@ -52,6 +52,9 @@ make_shareinputscan(PlannerInfo *root, Plan *inputplan)
        sisc->scan.plan.plan_rows = inputplan->plan_rows;
        sisc->scan.plan.plan_width = inputplan->plan_width;
 
+       sisc->scan.plan.locustype = inputplan->locustype;
+       sisc->scan.plan.parallel = 0; /* No parallel ShareInputScan */
+
        return sisc;
 }
 
diff --git a/src/test/regress/expected/cbdb_parallel.out 
b/src/test/regress/expected/cbdb_parallel.out
index 942705e7471..da3216896ff 100644
--- a/src/test/regress/expected/cbdb_parallel.out
+++ b/src/test/regress/expected/cbdb_parallel.out
@@ -3516,7 +3516,59 @@ WHERE e.salary > (
  David
 (2 rows)
 
-  
+--
+-- Test https://github.com/apache/cloudberry/issues/1376
+--
+create table t1(a int, b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table t2 (like t1);
+NOTICE:  table doesn't have 'DISTRIBUTED BY' clause, defaulting to 
distribution columns from LIKE table
+set gp_cte_sharing = on;
+explain(locus, costs off) with x as
+  (select a, count(*) as b from t1 group by a union all
+    select a, count(*) as b from t2 group by a)
+  select count(*) from x a join x b on a.a = b.b;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Finalize Aggregate
+   Locus: Entry
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Locus: Entry
+         ->  Partial Aggregate
+               Locus: Hashed
+               ->  Hash Join
+                     Locus: Hashed
+                     Hash Cond: (b.b = a.a)
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Locus: Hashed
+                           Hash Key: b.b
+                           ->  Subquery Scan on b
+                                 Locus: Strewn
+                                 ->  Shared Scan (share slice:id 2:0)
+                                       Locus: Hashed
+                     ->  Hash
+                           Locus: Hashed
+                           ->  Subquery Scan on a
+                                 Locus: Hashed
+                                 ->  Shared Scan (share slice:id 1:0)
+                                       Locus: Hashed
+                                       ->  Append
+                                             Locus: Hashed
+                                             ->  HashAggregate
+                                                   Locus: Hashed
+                                                   Group Key: t1.a
+                                                   ->  Seq Scan on t1
+                                                         Locus: Hashed
+                                             ->  HashAggregate
+                                                   Locus: Hashed
+                                                   Group Key: t2.a
+                                                   ->  Seq Scan on t2
+                                                         Locus: Hashed
+ Optimizer: Postgres query optimizer
+(35 rows)
+
+reset gp_cte_sharing;
 reset enable_parallel;
 reset min_parallel_table_scan_size;
 -- start_ignore
diff --git a/src/test/regress/sql/cbdb_parallel.sql 
b/src/test/regress/sql/cbdb_parallel.sql
index 0ee6f72cb2a..f9d01dd8a00 100644
--- a/src/test/regress/sql/cbdb_parallel.sql
+++ b/src/test/regress/sql/cbdb_parallel.sql
@@ -1132,7 +1132,20 @@ WHERE e.salary > (
     SELECT AVG(salary)
     FROM employees
     WHERE department_id = e.department_id);
-  
+
+--
+-- Test https://github.com/apache/cloudberry/issues/1376
+--
+create table t1(a int, b int);
+create table t2 (like t1);
+set gp_cte_sharing = on;
+
+explain(locus, costs off) with x as
+  (select a, count(*) as b from t1 group by a union all
+    select a, count(*) as b from t2 group by a)
+  select count(*) from x a join x b on a.a = b.b;
+
+reset gp_cte_sharing;
 reset enable_parallel;
 reset min_parallel_table_scan_size;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) branch main updated: Fix NULL locus of Shared Scan.

Reply via email to