This is an automated email from the ASF dual-hosted git repository. reshke pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit ff9f7609d0fb9d4691e4445ac29a77d0f8e63488 Author: chaotian <[email protected]> AuthorDate: Fri Jul 21 17:12:49 2023 +0800 fix bug of RelabelType in group by clause (#15958) Remove codes of pulling up vars from RelabelType in set_plan_refs, upstream has fixed this issue and removed such codes. In fact, we don't need to extract vars from RelabelType, we should just let upper RelabelType refer to lower RelabelType directly. --- src/backend/optimizer/plan/setrefs.c | 11 ------ src/test/regress/expected/bfv_aggregate.out | 27 ++++++++++++++ .../regress/expected/bfv_aggregate_optimizer.out | 42 ++++++++++++++++++++++ src/test/regress/sql/bfv_aggregate.sql | 8 +++++ 4 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 66e52469a0..4144591f53 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -2986,17 +2986,6 @@ build_tlist_index(List *tlist) Assert(expr); - /* - * Allow a Var in parent node's expr to find matching Var in tlist - * ignoring any RelabelType nodes atop the tlist Var. Also set - * has_non_vars so tlist expr can be matched as a whole. - */ - while (IsA(expr, RelabelType)) - { - expr = ((RelabelType *)expr)->arg; - itlist->has_non_vars = true; - } - if (expr && IsA(expr, Var)) { Var *var = (Var *) expr; diff --git a/src/test/regress/expected/bfv_aggregate.out b/src/test/regress/expected/bfv_aggregate.out index 14413d2326..80b3511ef6 100644 --- a/src/test/regress/expected/bfv_aggregate.out +++ b/src/test/regress/expected/bfv_aggregate.out @@ -1811,6 +1811,33 @@ select 1, median(col1) from group_by_const group by 1; 1 | 500.5 (1 row) +-- Test GROUP BY with a RelabelType +create table tx (c1 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into tx values('hello'); +EXPLAIN (COSTS OFF, VERBOSE ON) +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; + QUERY PLAN +---------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (min(c1)), ((c1)::character varying) + -> HashAggregate + Output: min(c1), ((c1)::character varying) + Group Key: (tx.c1)::character varying + -> Seq Scan on bfv_aggregate.tx + Output: (c1)::character varying, c1 + Optimizer: Postgres query optimizer + Settings: optimizer = 'off' +(9 rows) + +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; + min +------- + hello +(1 row) + +drop table tx; -- ORCA should pick singlestage-agg plan when multistage-agg guc is true -- and distribution type is universal/replicated set optimizer_force_multistage_agg to on; diff --git a/src/test/regress/expected/bfv_aggregate_optimizer.out b/src/test/regress/expected/bfv_aggregate_optimizer.out index cd030ed2eb..d079f56777 100644 --- a/src/test/regress/expected/bfv_aggregate_optimizer.out +++ b/src/test/regress/expected/bfv_aggregate_optimizer.out @@ -1823,6 +1823,48 @@ select 1, median(col1) from group_by_const group by 1; 1 | 500.5 (1 row) +-- Test GROUP BY with a RelabelType +create table tx (c1 text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into tx values('hello'); +EXPLAIN (COSTS OFF, VERBOSE ON) +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; + QUERY PLAN +------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: (min(c1)) + -> Finalize GroupAggregate + Output: min(c1) + Group Key: ((tx.c1)::character varying) + -> Sort + Output: ((c1)::character varying), (PARTIAL min(c1)) + Sort Key: ((tx.c1)::character varying) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: ((c1)::character varying), (PARTIAL min(c1)) + Hash Key: ((c1)::character varying) + -> Partial GroupAggregate + Output: ((c1)::character varying), PARTIAL min(c1) + Group Key: ((tx.c1)::character varying) + -> Sort + Output: ((c1)::character varying), c1 + Sort Key: ((tx.c1)::character varying) + -> Result + Output: c1, c1 + -> Redistribute Motion 3:3 (slice3; segments: 3) + Output: c1 + -> Seq Scan on bfv_aggregate.tx + Output: c1 + Optimizer: Pivotal Optimizer (GPORCA) +(24 rows) + +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; + min +------- + hello +(1 row) + +drop table tx; -- ORCA should pick singlestage-agg plan when multistage-agg guc is true -- and distribution type is universal/replicated set optimizer_force_multistage_agg to on; diff --git a/src/test/regress/sql/bfv_aggregate.sql b/src/test/regress/sql/bfv_aggregate.sql index 20fe31ee53..e3c34494f8 100644 --- a/src/test/regress/sql/bfv_aggregate.sql +++ b/src/test/regress/sql/bfv_aggregate.sql @@ -1469,6 +1469,14 @@ explain (costs off) select 1, median(col1) from group_by_const group by 1; select 1, median(col1) from group_by_const group by 1; +-- Test GROUP BY with a RelabelType +create table tx (c1 text); +insert into tx values('hello'); +EXPLAIN (COSTS OFF, VERBOSE ON) +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; +SELECT MIN(tx.c1) FROM tx GROUP BY (tx.c1)::VARCHAR; +drop table tx; + -- ORCA should pick singlestage-agg plan when multistage-agg guc is true -- and distribution type is universal/replicated --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
