(cloudberry) 25/30: Fix crash of AggNode in executor casued by ORCA plan (#14577)

maxyang Fri, 24 Jan 2025 03:11:26 -0800

This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit 87b3b32999774fd53f264955a89943e34dc8e278
Author: Haotian Chen <[email protected]>
AuthorDate: Fri Dec 9 16:06:10 2022 +0800

    Fix crash of AggNode in executor casued by ORCA plan (#14577)
    
    Using Aggref split type of output targetlist instead of Aggnode
    split typle as building trans/combine function in executor.
---
 src/backend/executor/execExpr.c                |  3 +-
 src/backend/executor/nodeAgg.c                 |  2 +-
 src/test/regress/expected/gp_dqa.out           | 10 ++--
 src/test/regress/expected/gp_dqa_optimizer.out | 74 +++++++++++++-------------
 src/test/regress/sql/gp_dqa.sql                |  2 +
 5 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 428b1be04b..03fe797e93 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3576,7 +3576,6 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase 
phase,
        ExprState  *state = makeNode(ExprState);
        PlanState  *parent = &aggstate->ss.ps;
        ExprEvalStep scratch = {0};
-       bool            isCombine = DO_AGGSPLIT_COMBINE(aggstate->aggsplit);
        LastAttnumInfo deform = {0, 0, 0};
 
        state->expr = (Expr *) aggstate;
@@ -3626,6 +3625,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase 
phase,
                ListCell   *bail;
                if (!bms_is_member(transno, aggstate->aggs_used))
                        continue;
+
+               bool isCombine = 
DO_AGGSPLIT_COMBINE(pertrans->aggref->aggsplit);
                /*
                 * If filter present, emit. Do so before evaluating the input, 
to
                 * avoid potentially unneeded computations, or even worse, 
unintended
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 75f160856c..790821324a 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -4233,7 +4233,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
         * transfn and transfn_oid fields of pertrans refer to the combine
         * function rather than the transition function.
         */
-       if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
+       if (DO_AGGSPLIT_COMBINE(aggref->aggsplit))
        {
                Expr       *combinefnexpr;
                size_t          numTransArgs;
diff --git a/src/test/regress/expected/gp_dqa.out 
b/src/test/regress/expected/gp_dqa.out
index 0e731124f7..5ed20b2f3e 100644
--- a/src/test/regress/expected/gp_dqa.out
+++ b/src/test/regress/expected/gp_dqa.out
@@ -2409,6 +2409,8 @@ select count(distinct a) from t_issue_659;
 
 reset gp_eager_distinct_dedup;
 reset optimizer_force_three_stage_scalar_dqa;
+reset optimizer_enable_use_distribution_in_dqa;
+drop table t_issue_659;
 -- fix dqa bug when optimizer_force_multistage_agg is on
 set optimizer_force_multistage_agg = on;
 create table multiagg1(a int, b bigint, c int);
@@ -2435,8 +2437,8 @@ explain (verbose, costs off) select count(distinct b), 
sum(c) from multiagg1;
                      Hash Key: b
                      ->  Seq Scan on public.multiagg1
                            Output: b, c
- Settings: enable_groupagg = 'off', enable_hashagg = 'on', 
gp_motion_cost_per_row = '2', optimizer = 'off'
  Optimizer: Postgres query optimizer
+ Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', 
optimizer_force_multistage_agg = 'on'
 (13 rows)
 
 select count(distinct b), sum(c) from multiagg1;
@@ -2446,8 +2448,8 @@ select count(distinct b), sum(c) from multiagg1;
 (1 row)
 
 explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2;
-                                                QUERY PLAN                     
                            
------------------------------------------------------------------------------------------------------------
+                                               QUERY PLAN                      
                         
+--------------------------------------------------------------------------------------------------------
  Finalize Aggregate
    Output: count(DISTINCT b), sum(c)
    ->  Gather Motion 3:1  (slice1; segments: 3)
@@ -2459,8 +2461,8 @@ explain (verbose, costs off) select count(distinct b), 
sum(c) from multiagg2;
                      Hash Key: b
                      ->  Seq Scan on public.multiagg2
                            Output: b, c
- Settings: enable_groupagg = 'off', enable_hashagg = 'on', 
gp_motion_cost_per_row = '2', optimizer = 'off'
  Optimizer: Postgres query optimizer
+ Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', 
optimizer_force_multistage_agg = 'on'
 (13 rows)
 
 select count(distinct b), sum(c) from multiagg2;
diff --git a/src/test/regress/expected/gp_dqa_optimizer.out 
b/src/test/regress/expected/gp_dqa_optimizer.out
index c870359c83..787180b791 100644
--- a/src/test/regress/expected/gp_dqa_optimizer.out
+++ b/src/test/regress/expected/gp_dqa_optimizer.out
@@ -2559,13 +2559,15 @@ select count(distinct a) from t_issue_659;
 
 reset gp_eager_distinct_dedup;
 reset optimizer_force_three_stage_scalar_dqa;
+reset optimizer_enable_use_distribution_in_dqa;
+drop table t_issue_659;
 -- fix dqa bug when optimizer_force_multistage_agg is on
 set optimizer_force_multistage_agg = on;
 create table multiagg1(a int, b bigint, c int);
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Cloudberry Database data distribution key for this table.
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table multiagg2(a int, b bigint, c numeric(8, 4));
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Cloudberry Database data distribution key for this table.
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into multiagg1 values(generate_series(1, 10), generate_series(1, 10), 
generate_series(1, 10));
 INFO:  GPORCA failed to produce a plan, falling back to planner
@@ -2576,56 +2578,56 @@ DETAIL:  Feature not supported: Unexpected target list 
entries in ProjectSet nod
 analyze multiagg1;
 analyze multiagg2;
 explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1;
-INFO:  GPORCA failed to produce a plan, falling back to planner
-DETAIL:  GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate 
stage not implemented not supported in DXL
-                                       QUERY PLAN                              
         
-----------------------------------------------------------------------------------------
- Finalize Aggregate
-   Output: count(DISTINCT b), sum(c)
+                                               QUERY PLAN                      
                         
+--------------------------------------------------------------------------------------------------------
+ Aggregate
+   Output: count(b), sum(c)
    ->  Gather Motion 3:1  (slice1; segments: 3)
-         Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c))
-         ->  Partial Aggregate
-               Output: PARTIAL count(DISTINCT b), PARTIAL sum(c)
+         Output: b, (PARTIAL sum(c))
+         ->  Partial HashAggregate
+               Output: b, PARTIAL sum(c)
+               Group Key: multiagg1.b
                ->  Redistribute Motion 3:3  (slice2; segments: 3)
-                     Output: b, c
+                     Output: b, (PARTIAL sum(c))
                      Hash Key: b
-                     ->  Seq Scan on public.multiagg1
-                           Output: b, c
- Settings: enable_groupagg = 'off', enable_hashagg = 'on', 
gp_motion_cost_per_row = '2'
- Optimizer: Postgres query optimizer
-(13 rows)
+                     ->  Streaming Partial HashAggregate
+                           Output: b, PARTIAL sum(c)
+                           Group Key: multiagg1.b
+                           ->  Seq Scan on public.multiagg1
+                                 Output: b, c
+ Optimizer: Pivotal Optimizer (GPORCA)
+ Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', 
optimizer_force_multistage_agg = 'on'
+(17 rows)
 
 select count(distinct b), sum(c) from multiagg1;
-INFO:  GPORCA failed to produce a plan, falling back to planner
-DETAIL:  GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate 
stage not implemented not supported in DXL
  count | sum 
 -------+-----
     10 |  55
 (1 row)
 
 explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2;
-INFO:  GPORCA failed to produce a plan, falling back to planner
-DETAIL:  GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate 
stage not implemented not supported in DXL
-                                       QUERY PLAN                              
         
-----------------------------------------------------------------------------------------
- Finalize Aggregate
-   Output: count(DISTINCT b), sum(c)
+                                               QUERY PLAN                      
                         
+--------------------------------------------------------------------------------------------------------
+ Aggregate
+   Output: count(b), sum(c)
    ->  Gather Motion 3:1  (slice1; segments: 3)
-         Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c))
-         ->  Partial Aggregate
-               Output: PARTIAL count(DISTINCT b), PARTIAL sum(c)
+         Output: b, (PARTIAL sum(c))
+         ->  Partial HashAggregate
+               Output: b, PARTIAL sum(c)
+               Group Key: multiagg2.b
                ->  Redistribute Motion 3:3  (slice2; segments: 3)
-                     Output: b, c
+                     Output: b, (PARTIAL sum(c))
                      Hash Key: b
-                     ->  Seq Scan on public.multiagg2
-                           Output: b, c
- Settings: enable_groupagg = 'off', enable_hashagg = 'on', 
gp_motion_cost_per_row = '2'
- Optimizer: Postgres query optimizer
-(13 rows)
+                     ->  Streaming Partial HashAggregate
+                           Output: b, PARTIAL sum(c)
+                           Group Key: multiagg2.b
+                           ->  Seq Scan on public.multiagg2
+                                 Output: b, c
+ Optimizer: Pivotal Optimizer (GPORCA)
+ Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', 
optimizer_force_multistage_agg = 'on'
+(17 rows)
 
 select count(distinct b), sum(c) from multiagg2;
-INFO:  GPORCA failed to produce a plan, falling back to planner
-DETAIL:  GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate 
stage not implemented not supported in DXL
  count |    sum    
 -------+-----------
     10 | 5555.5000
diff --git a/src/test/regress/sql/gp_dqa.sql b/src/test/regress/sql/gp_dqa.sql
index b5180e49e3..8b4cda3a8c 100644
--- a/src/test/regress/sql/gp_dqa.sql
+++ b/src/test/regress/sql/gp_dqa.sql
@@ -431,6 +431,8 @@ select count(distinct a) from t_issue_659;
 select count(distinct a) from t_issue_659;
 reset gp_eager_distinct_dedup;
 reset optimizer_force_three_stage_scalar_dqa;
+reset optimizer_enable_use_distribution_in_dqa;
+drop table t_issue_659;
 
 
 -- fix dqa bug when optimizer_force_multistage_agg is on


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 25/30: Fix crash of AggNode in executor casued by ORCA plan (#14577)

Reply via email to