This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 33b36471d27c703dc02c7a0e1817e7374730043f
Author: Hari krishna <[email protected]>
AuthorDate: Mon Jul 31 15:57:21 2023 +0530

    Correct the child order when creating grouping sets for the rollup (#16096)
    
    * Correct the child order when creating grouping sets for the rollup
    
    Issue:
          Performance drop observed with TPCDS-DS query-167
    RCA:
          When a query involves a ROLLUP, we observed that the order of
    children under the append operator has changed. The first child of the
    append operator has a distribution of "SINGLETON," which results in all
    inner children inheriting the same distribution. As a consequence,
    Gather Motion operations are performed for all the aggregators under the
    Append operator, leading to increased execution time for the append
    operator in TPC-DS query 167.
    
    FIX:
          The PR addresses the issue by fixing the order of children while
    creating grouping sets for the rollup during the query to DXL
    translation.
---
 src/backend/gpopt/translate/CTranslatorUtils.cpp   |   6 +-
 src/test/regress/expected/bfv_olap.out             | 107 +++++++++++
 src/test/regress/expected/bfv_olap_optimizer.out   | 196 +++++++++++++++++++++
 .../regress/expected/groupingsets_optimizer.out    |   4 +-
 src/test/regress/sql/bfv_olap.sql                  |  17 ++
 5 files changed, 326 insertions(+), 4 deletions(-)

diff --git a/src/backend/gpopt/translate/CTranslatorUtils.cpp 
b/src/backend/gpopt/translate/CTranslatorUtils.cpp
index cb5e4bcb53..52c1b172e6 100644
--- a/src/backend/gpopt/translate/CTranslatorUtils.cpp
+++ b/src/backend/gpopt/translate/CTranslatorUtils.cpp
@@ -1163,6 +1163,10 @@ 
CTranslatorUtils::CreateGroupingSetsForRollup(CMemoryPool *mp,
        CBitSetArray *col_attnos_arr = GPOS_NEW(mp) CBitSetArray(mp);
        ListCell *lc = nullptr;
        CBitSet *current_result = GPOS_NEW(mp) CBitSet(mp);
+       // Maintaining the order of grouping sets is essential because the
+       // UnionAll operator matches each child's distribution with the
+       // distribution of the first child
+       col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp));
        ForEach(lc, grouping_set->content)
        {
                GroupingSet *gs_current = (GroupingSet *) lfirst(lc);
@@ -1175,8 +1179,6 @@ CTranslatorUtils::CreateGroupingSetsForRollup(CMemoryPool 
*mp,
                bset->Release();
                col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp, 
*current_result));
        }
-       // add an empty set
-       col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp));
        current_result->Release();
        return col_attnos_arr;
 }
diff --git a/src/test/regress/expected/bfv_olap.out 
b/src/test/regress/expected/bfv_olap.out
index b6a8bed241..eadb52b004 100644
--- a/src/test/regress/expected/bfv_olap.out
+++ b/src/test/regress/expected/bfv_olap.out
@@ -453,9 +453,86 @@ where g in (
   15
 (1 row)
 
+--
+-- Test to check the query plan for a ROLLUP query.
+--
+explain (costs off) select cn, vn, pn, sum(qty*prc) from sale group by 
rollup(cn,vn,pn);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Finalize HashAggregate
+         Group Key: cn, vn, pn, (GROUPINGSET_ID())
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: cn, vn, pn, (GROUPINGSET_ID())
+               ->  Partial MixedAggregate
+                     Hash Key: cn, vn, pn
+                     Hash Key: cn, vn
+                     Hash Key: cn
+                     Group Key: ()
+                     ->  Seq Scan on sale
+ Optimizer: Postgres query optimizer
+(12 rows)
+
+select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn);
+ cn | vn | pn  |   sum   
+----+----+-----+---------
+  1 | 50 | 400 |       0
+  1 | 10 |     |       0
+  2 | 40 |     | 2640000
+  2 | 40 | 100 | 2640000
+  3 | 40 | 200 |       0
+  1 | 30 | 500 |      60
+  3 | 40 |     |       0
+  4 | 40 | 800 |       1
+  4 |    |     |       2
+  1 | 50 |     |       0
+  1 | 20 | 100 |       0
+  3 | 30 | 500 |      60
+  3 | 30 | 600 |      60
+  2 | 50 | 400 |       0
+  2 | 50 |     |       0
+  3 |    |     |     120
+  1 | 30 |     |      60
+  1 | 20 |     |       0
+  2 |    |     | 2640000
+  3 | 30 |     |     120
+  4 | 40 |     |       2
+  4 | 40 | 700 |       1
+  1 | 30 | 300 |       0
+  1 | 10 | 200 |       0
+  1 |    |     |      60
+    |    |     | 2640182
+(26 rows)
+
 --
 -- This caused a crash in ROLLUP planning at one point.
 --
+EXPLAIN (costs off)
+SELECT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+                                   QUERY PLAN                                  
  
+---------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: sale.dt, sale.cn, sale.pn, sale.vn, (GROUPINGSET_ID())
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: sale.dt, sale.cn, sale.pn, sale.vn, (GROUPINGSET_ID())
+               ->  Partial MixedAggregate
+                     Hash Key: sale.dt, sale.cn, sale.pn, sale.vn
+                     Hash Key: sale.dt, sale.cn, sale.pn
+                     Hash Key: sale.dt, sale.cn
+                     Group Key: ()
+                     ->  Hash Join
+                           Hash Cond: (sale.vn = vendor.vn)
+                           ->  Seq Scan on sale
+                           ->  Hash
+                                 ->  Broadcast Motion 3:3  (slice3; segments: 
3)
+                                       ->  Seq Scan on vendor
+ Optimizer: Postgres query optimizer
+(17 rows)
+
 SELECT sale.vn
 FROM sale,vendor
 WHERE sale.vn=vendor.vn
@@ -498,6 +575,36 @@ GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
    
 (34 rows)
 
+EXPLAIN (costs off)
+SELECT DISTINCT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+                                         QUERY PLAN                            
              
+---------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: sale.vn
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: sale.vn
+               ->  HashAggregate
+                     Group Key: sale.dt, sale.cn, sale.pn, sale.vn, 
(GROUPINGSET_ID())
+                     ->  Redistribute Motion 3:3  (slice3; segments: 3)
+                           Hash Key: sale.dt, sale.cn, sale.pn, sale.vn, 
(GROUPINGSET_ID())
+                           ->  Partial MixedAggregate
+                                 Hash Key: sale.dt, sale.cn, sale.pn, sale.vn
+                                 Hash Key: sale.dt, sale.cn, sale.pn
+                                 Hash Key: sale.dt, sale.cn
+                                 Group Key: ()
+                                 ->  Hash Join
+                                       Hash Cond: (sale.vn = vendor.vn)
+                                       ->  Seq Scan on sale
+                                       ->  Hash
+                                             ->  Broadcast Motion 3:3  
(slice4; segments: 3)
+                                                   ->  Seq Scan on vendor
+ Optimizer: Postgres query optimizer
+(21 rows)
+
 SELECT DISTINCT sale.vn
 FROM sale,vendor
 WHERE sale.vn=vendor.vn
diff --git a/src/test/regress/expected/bfv_olap_optimizer.out 
b/src/test/regress/expected/bfv_olap_optimizer.out
index ae41f93494..b76cfccd72 100644
--- a/src/test/regress/expected/bfv_olap_optimizer.out
+++ b/src/test/regress/expected/bfv_olap_optimizer.out
@@ -453,9 +453,141 @@ where g in (
   15
 (1 row)
 
+--
+-- Test to check the query plan for a ROLLUP query.
+--
+explain (costs off) select cn, vn, pn, sum(qty*prc) from sale group by 
rollup(cn,vn,pn);
+                                      QUERY PLAN                               
       
+--------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Sequence
+         ->  Shared Scan (share slice:id 1:0)
+               ->  Seq Scan on sale
+         ->  Append
+               ->  GroupAggregate
+                     Group Key: share0_ref2.cn, share0_ref2.vn, share0_ref2.pn
+                     ->  Sort
+                           Sort Key: share0_ref2.cn, share0_ref2.vn, 
share0_ref2.pn
+                           ->  Shared Scan (share slice:id 1:0)
+               ->  Finalize GroupAggregate
+                     Group Key: share0_ref3.cn, share0_ref3.vn
+                     ->  Sort
+                           Sort Key: share0_ref3.cn, share0_ref3.vn
+                           ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                                 Hash Key: share0_ref3.cn, share0_ref3.vn
+                                 ->  Partial GroupAggregate
+                                       Group Key: share0_ref3.cn, 
share0_ref3.vn
+                                       ->  Sort
+                                             Sort Key: share0_ref3.cn, 
share0_ref3.vn
+                                             ->  Shared Scan (share slice:id 
2:0)
+               ->  Finalize GroupAggregate
+                     Group Key: share0_ref4.cn
+                     ->  Sort
+                           Sort Key: share0_ref4.cn
+                           ->  Redistribute Motion 3:3  (slice3; segments: 3)
+                                 Hash Key: share0_ref4.cn
+                                 ->  Partial GroupAggregate
+                                       Group Key: share0_ref4.cn
+                                       ->  Sort
+                                             Sort Key: share0_ref4.cn
+                                             ->  Shared Scan (share slice:id 
3:0)
+               ->  Result
+                     ->  Redistribute Motion 1:3  (slice4)
+                           ->  Finalize Aggregate
+                                 ->  Gather Motion 3:1  (slice5; segments: 3)
+                                       ->  Partial Aggregate
+                                             ->  Shared Scan (share slice:id 
5:0)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(39 rows)
+
+select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn);
+ cn | vn | pn  |   sum   
+----+----+-----+---------
+  1 | 10 | 200 |       0
+  3 | 30 | 500 |      60
+  2 | 40 |     | 2640000
+  3 | 40 |     |       0
+  2 | 50 | 400 |       0
+  3 | 30 | 600 |      60
+  4 | 40 | 800 |       1
+  3 | 30 |     |     120
+  4 | 40 |     |       2
+  2 |    |     | 2640000
+  3 |    |     |     120
+  4 |    |     |       2
+  1 | 20 | 100 |       0
+  1 | 30 | 300 |       0
+  1 | 30 | 500 |      60
+  1 | 50 | 400 |       0
+  2 | 40 | 100 | 2640000
+  3 | 40 | 200 |       0
+  4 | 40 | 700 |       1
+  1 | 10 |     |       0
+  1 | 20 |     |       0
+  1 | 30 |     |      60
+  1 | 50 |     |       0
+  2 | 50 |     |       0
+  1 |    |     |      60
+    |    |     | 2640182
+(26 rows)
+
 --
 -- This caused a crash in ROLLUP planning at one point.
 --
+EXPLAIN (costs off)
+SELECT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+                                              QUERY PLAN                       
                       
+------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Sequence
+         ->  Shared Scan (share slice:id 1:0)
+               ->  Nested Loop
+                     Join Filter: true
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Hash Key: sale.vn
+                           ->  Seq Scan on sale
+                     ->  Index Scan using vendor_pkey on vendor
+                           Index Cond: (vn = sale.vn)
+         ->  Append
+               ->  GroupAggregate
+                     Group Key: share0_ref2.dt, share0_ref2.cn, 
share0_ref2.pn, share0_ref2.vn
+                     ->  Sort
+                           Sort Key: share0_ref2.dt, share0_ref2.cn, 
share0_ref2.pn, share0_ref2.vn
+                           ->  Shared Scan (share slice:id 1:0)
+               ->  GroupAggregate
+                     Group Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn
+                     ->  Sort
+                           Sort Key: share0_ref3.dt, share0_ref3.cn, 
share0_ref3.pn
+                           ->  Redistribute Motion 3:3  (slice3; segments: 3)
+                                 Hash Key: share0_ref3.dt, share0_ref3.cn, 
share0_ref3.pn
+                                 ->  GroupAggregate
+                                       Group Key: share0_ref3.dt, 
share0_ref3.cn, share0_ref3.pn
+                                       ->  Sort
+                                             Sort Key: share0_ref3.dt, 
share0_ref3.cn, share0_ref3.pn
+                                             ->  Shared Scan (share slice:id 
3:0)
+               ->  GroupAggregate
+                     Group Key: share0_ref4.dt, share0_ref4.cn
+                     ->  Sort
+                           Sort Key: share0_ref4.dt, share0_ref4.cn
+                           ->  Redistribute Motion 3:3  (slice4; segments: 3)
+                                 Hash Key: share0_ref4.dt, share0_ref4.cn
+                                 ->  GroupAggregate
+                                       Group Key: share0_ref4.dt, 
share0_ref4.cn
+                                       ->  Sort
+                                             Sort Key: share0_ref4.dt, 
share0_ref4.cn
+                                             ->  Shared Scan (share slice:id 
4:0)
+               ->  Result
+                     ->  Redistribute Motion 1:3  (slice5)
+                           ->  Aggregate
+                                 ->  Gather Motion 3:1  (slice6; segments: 3)
+                                       ->  Aggregate
+                                             ->  Shared Scan (share slice:id 
6:0)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(45 rows)
+
 SELECT sale.vn
 FROM sale,vendor
 WHERE sale.vn=vendor.vn
@@ -498,6 +630,70 @@ GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
    
 (34 rows)
 
+EXPLAIN (costs off)
+SELECT DISTINCT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+                                                             QUERY PLAN        
                                                     
+------------------------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  GroupAggregate
+         Group Key: share0_ref2.vn
+         ->  Sort
+               Sort Key: share0_ref2.vn
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Hash Key: share0_ref2.vn
+                     ->  GroupAggregate
+                           Group Key: share0_ref2.vn
+                           ->  Sort
+                                 Sort Key: share0_ref2.vn
+                                 ->  Sequence
+                                       ->  Shared Scan (share slice:id 2:0)
+                                             ->  Nested Loop
+                                                   Join Filter: true
+                                                   ->  Redistribute Motion 3:3 
 (slice3; segments: 3)
+                                                         Hash Key: sale.vn
+                                                         ->  Seq Scan on sale
+                                                   ->  Index Scan using 
vendor_pkey on vendor
+                                                         Index Cond: (vn = 
sale.vn)
+                                       ->  Append
+                                             ->  GroupAggregate
+                                                   Group Key: share0_ref2.dt, 
share0_ref2.cn, share0_ref2.pn, share0_ref2.vn
+                                                   ->  Sort
+                                                         Sort Key: 
share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn
+                                                         ->  Shared Scan 
(share slice:id 2:0)
+                                             ->  GroupAggregate
+                                                   Group Key: share0_ref3.dt, 
share0_ref3.cn, share0_ref3.pn
+                                                   ->  Sort
+                                                         Sort Key: 
share0_ref3.dt, share0_ref3.cn, share0_ref3.pn
+                                                         ->  Redistribute 
Motion 3:3  (slice4; segments: 3)
+                                                               Hash Key: 
share0_ref3.dt, share0_ref3.cn, share0_ref3.pn
+                                                               ->  
GroupAggregate
+                                                                     Group 
Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn
+                                                                     ->  Sort
+                                                                           
Sort Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn
+                                                                           ->  
Shared Scan (share slice:id 4:0)
+                                             ->  GroupAggregate
+                                                   Group Key: share0_ref4.dt, 
share0_ref4.cn
+                                                   ->  Sort
+                                                         Sort Key: 
share0_ref4.dt, share0_ref4.cn
+                                                         ->  Redistribute 
Motion 3:3  (slice5; segments: 3)
+                                                               Hash Key: 
share0_ref4.dt, share0_ref4.cn
+                                                               ->  
GroupAggregate
+                                                                     Group 
Key: share0_ref4.dt, share0_ref4.cn
+                                                                     ->  Sort
+                                                                           
Sort Key: share0_ref4.dt, share0_ref4.cn
+                                                                           ->  
Shared Scan (share slice:id 5:0)
+                                             ->  Result
+                                                   ->  Redistribute Motion 1:3 
 (slice6)
+                                                         ->  Aggregate
+                                                               ->  Gather 
Motion 3:1  (slice7; segments: 3)
+                                                                     ->  
Aggregate
+                                                                           ->  
Shared Scan (share slice:id 7:0)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(55 rows)
+
 SELECT DISTINCT sale.vn
 FROM sale,vendor
 WHERE sale.vn=vendor.vn
diff --git a/src/test/regress/expected/groupingsets_optimizer.out 
b/src/test/regress/expected/groupingsets_optimizer.out
index 403c1fbb63..4493b747b0 100644
--- a/src/test/regress/expected/groupingsets_optimizer.out
+++ b/src/test/regress/expected/groupingsets_optimizer.out
@@ -919,9 +919,9 @@ explain (costs off)
                                           QUERY PLAN                           
               
 
----------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   Merge Key: (NULL::integer)
+   Merge Key: share0_ref2.a
    ->  Sort
-         Sort Key: (NULL::integer)
+         Sort Key: share0_ref2.a
          ->  Sequence
                ->  Shared Scan (share slice:id 1:0)
                      ->  Seq Scan on gstest2
diff --git a/src/test/regress/sql/bfv_olap.sql 
b/src/test/regress/sql/bfv_olap.sql
index 46d9374980..c1a7a7759e 100644
--- a/src/test/regress/sql/bfv_olap.sql
+++ b/src/test/regress/sql/bfv_olap.sql
@@ -333,15 +333,32 @@ where g in (
   select rank() over (order by x) from generate_series(1,5) x
 );
 
+--
+-- Test to check the query plan for a ROLLUP query.
+--
+explain (costs off) select cn, vn, pn, sum(qty*prc) from sale group by 
rollup(cn,vn,pn);
+select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn);
 
 --
 -- This caused a crash in ROLLUP planning at one point.
 --
+EXPLAIN (costs off)
+SELECT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+
 SELECT sale.vn
 FROM sale,vendor
 WHERE sale.vn=vendor.vn
 GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
 
+EXPLAIN (costs off)
+SELECT DISTINCT sale.vn
+FROM sale,vendor
+WHERE sale.vn=vendor.vn
+GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn));
+
 SELECT DISTINCT sale.vn
 FROM sale,vendor
 WHERE sale.vn=vendor.vn


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to