This is an automated email from the ASF dual-hosted git repository. avamingli pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 49ae63ad03324c6f055ab24410e02098fbc543e0 Author: Chris Hajas <[email protected]> AuthorDate: Tue Dec 13 16:25:53 2022 -0800 Fix gp_dqa test to explicitly analyze tables (#14643) Commit 0f4af19b71f2f31acd28c39d0d0a865cbf1d3bab added this test, but the `analyze` statement exposed an unrelated issue and caused CI to fail. We should explicitly analyze tables to reduce runtime in tests anyway. --- src/test/regress/expected/gp_dqa.out | 64 +++++++++++++++++++++- src/test/regress/expected/gp_dqa_optimizer.out | 74 ++++++++++++++++++++++++++ src/test/regress/sql/gp_dqa.sql | 18 +++++++ 3 files changed, 155 insertions(+), 1 deletion(-) diff --git a/src/test/regress/expected/gp_dqa.out b/src/test/regress/expected/gp_dqa.out index 9090a1547a..30505978ab 100644 --- a/src/test/regress/expected/gp_dqa.out +++ b/src/test/regress/expected/gp_dqa.out @@ -2367,7 +2367,7 @@ select count(distinct a) from t_issue_659; -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate -> Seq Scan on t_issue_659 - Optimizer: Postgres query optimizer + Optimizer: Pivotal Optimizer (GPORCA) (5 rows) select count(distinct a) from t_issue_659; @@ -2402,6 +2402,68 @@ select count(distinct a) from t_issue_659; reset gp_eager_distinct_dedup; reset optimizer_force_three_stage_scalar_dqa; +-- fix dqa bug when optimizer_force_multistage_agg is on +set optimizer_force_multistage_agg = on; +create table multiagg1(a int, b bigint, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table multiagg2(a int, b bigint, c numeric(8, 4)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into multiagg1 values(generate_series(1, 10), generate_series(1, 10), generate_series(1, 10)); +insert into multiagg2 values(generate_series(1, 10), generate_series(1, 10), 555.55); +analyze multiagg1; +analyze multiagg2; +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(DISTINCT b), sum(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c)) + -> Partial Aggregate + Output: PARTIAL count(DISTINCT b), PARTIAL sum(c) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c + Hash Key: b + -> Seq Scan on public.multiagg1 + Output: b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' + Optimizer: Postgres query optimizer +(13 rows) + +select count(distinct b), sum(c) from multiagg1; + count | sum +-------+----- + 10 | 55 +(1 row) + +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(DISTINCT b), sum(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c)) + -> Partial Aggregate + Output: PARTIAL count(DISTINCT b), PARTIAL sum(c) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c + Hash Key: b + -> Seq Scan on public.multiagg2 + Output: b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' + Optimizer: Postgres query optimizer +(13 rows) + +select count(distinct b), sum(c) from multiagg2; + count | sum +-------+----------- + 10 | 5555.5000 +(1 row) + +drop table multiagg1; +drop table multiagg2; reset optimizer_force_multistage_agg; reset optimizer_enable_use_distribution_in_dqa; drop table t_issue_659; diff --git a/src/test/regress/expected/gp_dqa_optimizer.out b/src/test/regress/expected/gp_dqa_optimizer.out index 3f1c7f540e..a99bf5a3ee 100644 --- a/src/test/regress/expected/gp_dqa_optimizer.out +++ b/src/test/regress/expected/gp_dqa_optimizer.out @@ -2550,6 +2550,80 @@ select count(distinct a) from t_issue_659; reset gp_eager_distinct_dedup; reset optimizer_force_three_stage_scalar_dqa; +-- fix dqa bug when optimizer_force_multistage_agg is on +set optimizer_force_multistage_agg = on; +create table multiagg1(a int, b bigint, c int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table multiagg2(a int, b bigint, c numeric(8, 4)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into multiagg1 values(generate_series(1, 10), generate_series(1, 10), generate_series(1, 10)); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Feature not supported: Unexpected target list entries in ProjectSet node +insert into multiagg2 values(generate_series(1, 10), generate_series(1, 10), 555.55); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Feature not supported: Unexpected target list entries in ProjectSet node +analyze multiagg1; +analyze multiagg2; +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(DISTINCT b), sum(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c)) + -> Partial Aggregate + Output: PARTIAL count(DISTINCT b), PARTIAL sum(c) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c + Hash Key: b + -> Seq Scan on public.multiagg1 + Output: b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2' + Optimizer: Postgres query optimizer +(13 rows) + +select count(distinct b), sum(c) from multiagg1; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL + count | sum +-------+----- + 10 | 55 +(1 row) + +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL + QUERY PLAN +---------------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(DISTINCT b), sum(c) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c)) + -> Partial Aggregate + Output: PARTIAL count(DISTINCT b), PARTIAL sum(c) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: b, c + Hash Key: b + -> Seq Scan on public.multiagg2 + Output: b, c + Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2' + Optimizer: Postgres query optimizer +(13 rows) + +select count(distinct b), sum(c) from multiagg2; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL + count | sum +-------+----------- + 10 | 5555.5000 +(1 row) + +drop table multiagg1; +drop table multiagg2; reset optimizer_force_multistage_agg; reset optimizer_enable_use_distribution_in_dqa; drop table t_issue_659; diff --git a/src/test/regress/sql/gp_dqa.sql b/src/test/regress/sql/gp_dqa.sql index 65f52cd743..9bbdf48c19 100644 --- a/src/test/regress/sql/gp_dqa.sql +++ b/src/test/regress/sql/gp_dqa.sql @@ -428,6 +428,24 @@ select count(distinct a) from t_issue_659; select count(distinct a) from t_issue_659; reset gp_eager_distinct_dedup; reset optimizer_force_three_stage_scalar_dqa; + + +-- fix dqa bug when optimizer_force_multistage_agg is on +set optimizer_force_multistage_agg = on; +create table multiagg1(a int, b bigint, c int); +create table multiagg2(a int, b bigint, c numeric(8, 4)); +insert into multiagg1 values(generate_series(1, 10), generate_series(1, 10), generate_series(1, 10)); +insert into multiagg2 values(generate_series(1, 10), generate_series(1, 10), 555.55); +analyze multiagg1; +analyze multiagg2; + +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; +select count(distinct b), sum(c) from multiagg1; + +explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; +select count(distinct b), sum(c) from multiagg2; +drop table multiagg1; +drop table multiagg2; reset optimizer_force_multistage_agg; reset optimizer_enable_use_distribution_in_dqa; drop table t_issue_659; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
