(cloudberry) 04/43: fix bug of RelabelType in group by clause (#15958)

reshke Wed, 12 Feb 2025 22:05:38 -0800

This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit ff9f7609d0fb9d4691e4445ac29a77d0f8e63488
Author: chaotian <[email protected]>
AuthorDate: Fri Jul 21 17:12:49 2023 +0800

    fix bug of RelabelType in group by clause (#15958)
    
    Remove codes of pulling up vars from RelabelType in set_plan_refs, upstream 
has
    fixed this issue and removed such codes. In fact, we don't need to extract 
vars from
    RelabelType, we should just let upper RelabelType refer to lower 
RelabelType directly.
---
 src/backend/optimizer/plan/setrefs.c               | 11 ------
 src/test/regress/expected/bfv_aggregate.out        | 27 ++++++++++++++
 .../regress/expected/bfv_aggregate_optimizer.out   | 42 ++++++++++++++++++++++
 src/test/regress/sql/bfv_aggregate.sql             |  8 +++++
 4 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/src/backend/optimizer/plan/setrefs.c 
b/src/backend/optimizer/plan/setrefs.c
index 66e52469a0..4144591f53 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -2986,17 +2986,6 @@ build_tlist_index(List *tlist)
 
                Assert(expr);
 
-               /*
-                * Allow a Var in parent node's expr to find matching Var in 
tlist
-                * ignoring any RelabelType nodes atop the tlist Var.  Also set
-                * has_non_vars so tlist expr can be matched as a whole.
-                */
-               while (IsA(expr, RelabelType))
-               {
-                       expr = ((RelabelType *)expr)->arg;
-                       itlist->has_non_vars = true;
-               }
-
                if (expr && IsA(expr, Var))
                {
                        Var                *var = (Var *) expr;
diff --git a/src/test/regress/expected/bfv_aggregate.out 
b/src/test/regress/expected/bfv_aggregate.out
index 14413d2326..80b3511ef6 100644
--- a/src/test/regress/expected/bfv_aggregate.out
+++ b/src/test/regress/expected/bfv_aggregate.out
@@ -1811,6 +1811,33 @@ select 1, median(col1) from group_by_const group by 1;
         1 |  500.5
 (1 row)
 
+-- Test GROUP BY with a RelabelType
+create table tx (c1 text);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tx values('hello');
+EXPLAIN (COSTS OFF, VERBOSE ON)
+SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: (min(c1)), ((c1)::character varying)
+   ->  HashAggregate
+         Output: min(c1), ((c1)::character varying)
+         Group Key: (tx.c1)::character varying
+         ->  Seq Scan on bfv_aggregate.tx
+               Output: (c1)::character varying, c1
+ Optimizer: Postgres query optimizer
+ Settings: optimizer = 'off'
+(9 rows)
+
+SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR;
+  min  
+-------
+ hello
+(1 row)
+
+drop table tx;
 -- ORCA should pick singlestage-agg plan when multistage-agg guc is true
 -- and distribution type is universal/replicated
 set optimizer_force_multistage_agg to on;
diff --git a/src/test/regress/expected/bfv_aggregate_optimizer.out 
b/src/test/regress/expected/bfv_aggregate_optimizer.out
index cd030ed2eb..d079f56777 100644
--- a/src/test/regress/expected/bfv_aggregate_optimizer.out
+++ b/src/test/regress/expected/bfv_aggregate_optimizer.out
@@ -1823,6 +1823,48 @@ select 1, median(col1) from group_by_const group by 1;
         1 |  500.5
 (1 row)
 
+-- Test GROUP BY with a RelabelType
+create table tx (c1 text);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tx values('hello');
+EXPLAIN (COSTS OFF, VERBOSE ON)
+SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR;
+                                        QUERY PLAN                             
           
+------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: (min(c1))
+   ->  Finalize GroupAggregate
+         Output: min(c1)
+         Group Key: ((tx.c1)::character varying)
+         ->  Sort
+               Output: ((c1)::character varying), (PARTIAL min(c1))
+               Sort Key: ((tx.c1)::character varying)
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Output: ((c1)::character varying), (PARTIAL min(c1))
+                     Hash Key: ((c1)::character varying)
+                     ->  Partial GroupAggregate
+                           Output: ((c1)::character varying), PARTIAL min(c1)
+                           Group Key: ((tx.c1)::character varying)
+                           ->  Sort
+                                 Output: ((c1)::character varying), c1
+                                 Sort Key: ((tx.c1)::character varying)
+                                 ->  Result
+                                       Output: c1, c1
+                                       ->  Redistribute Motion 3:3  (slice3; 
segments: 3)
+                                             Output: c1
+                                             ->  Seq Scan on bfv_aggregate.tx
+                                                   Output: c1
+ Optimizer: Pivotal Optimizer (GPORCA)
+(24 rows)
+
+SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR;
+  min  
+-------
+ hello
+(1 row)
+
+drop table tx;
 -- ORCA should pick singlestage-agg plan when multistage-agg guc is true
 -- and distribution type is universal/replicated
 set optimizer_force_multistage_agg to on;
diff --git a/src/test/regress/sql/bfv_aggregate.sql 
b/src/test/regress/sql/bfv_aggregate.sql
index 20fe31ee53..e3c34494f8 100644
--- a/src/test/regress/sql/bfv_aggregate.sql
+++ b/src/test/regress/sql/bfv_aggregate.sql
@@ -1469,6 +1469,14 @@ explain (costs off)
 select 1, median(col1) from group_by_const group by 1;
 select 1, median(col1) from group_by_const group by 1;
 
+-- Test GROUP BY with a RelabelType
+create table tx (c1 text);
+insert into tx values('hello');
+EXPLAIN (COSTS OFF, VERBOSE ON)
+SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR;
+SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR;
+drop table tx;
+
 -- ORCA should pick singlestage-agg plan when multistage-agg guc is true
 -- and distribution type is universal/replicated
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 04/43: fix bug of RelabelType in group by clause (#15958)

Reply via email to