This is an automated email from the ASF dual-hosted git repository. reshke pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 33b36471d27c703dc02c7a0e1817e7374730043f Author: Hari krishna <[email protected]> AuthorDate: Mon Jul 31 15:57:21 2023 +0530 Correct the child order when creating grouping sets for the rollup (#16096) * Correct the child order when creating grouping sets for the rollup Issue: Performance drop observed with TPCDS-DS query-167 RCA: When a query involves a ROLLUP, we observed that the order of children under the append operator has changed. The first child of the append operator has a distribution of "SINGLETON," which results in all inner children inheriting the same distribution. As a consequence, Gather Motion operations are performed for all the aggregators under the Append operator, leading to increased execution time for the append operator in TPC-DS query 167. FIX: The PR addresses the issue by fixing the order of children while creating grouping sets for the rollup during the query to DXL translation. --- src/backend/gpopt/translate/CTranslatorUtils.cpp | 6 +- src/test/regress/expected/bfv_olap.out | 107 +++++++++++ src/test/regress/expected/bfv_olap_optimizer.out | 196 +++++++++++++++++++++ .../regress/expected/groupingsets_optimizer.out | 4 +- src/test/regress/sql/bfv_olap.sql | 17 ++ 5 files changed, 326 insertions(+), 4 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorUtils.cpp b/src/backend/gpopt/translate/CTranslatorUtils.cpp index cb5e4bcb53..52c1b172e6 100644 --- a/src/backend/gpopt/translate/CTranslatorUtils.cpp +++ b/src/backend/gpopt/translate/CTranslatorUtils.cpp @@ -1163,6 +1163,10 @@ CTranslatorUtils::CreateGroupingSetsForRollup(CMemoryPool *mp, CBitSetArray *col_attnos_arr = GPOS_NEW(mp) CBitSetArray(mp); ListCell *lc = nullptr; CBitSet *current_result = GPOS_NEW(mp) CBitSet(mp); + // Maintaining the order of grouping sets is essential because the + // UnionAll operator matches each child's distribution with the + // distribution of the first child + col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp)); ForEach(lc, grouping_set->content) { GroupingSet *gs_current = (GroupingSet *) lfirst(lc); @@ -1175,8 +1179,6 @@ CTranslatorUtils::CreateGroupingSetsForRollup(CMemoryPool *mp, bset->Release(); col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp, *current_result)); } - // add an empty set - col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp)); current_result->Release(); return col_attnos_arr; } diff --git a/src/test/regress/expected/bfv_olap.out b/src/test/regress/expected/bfv_olap.out index b6a8bed241..eadb52b004 100644 --- a/src/test/regress/expected/bfv_olap.out +++ b/src/test/regress/expected/bfv_olap.out @@ -453,9 +453,86 @@ where g in ( 15 (1 row) +-- +-- Test to check the query plan for a ROLLUP query. +-- +explain (costs off) select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Finalize HashAggregate + Group Key: cn, vn, pn, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: cn, vn, pn, (GROUPINGSET_ID()) + -> Partial MixedAggregate + Hash Key: cn, vn, pn + Hash Key: cn, vn + Hash Key: cn + Group Key: () + -> Seq Scan on sale + Optimizer: Postgres query optimizer +(12 rows) + +select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); + cn | vn | pn | sum +----+----+-----+--------- + 1 | 50 | 400 | 0 + 1 | 10 | | 0 + 2 | 40 | | 2640000 + 2 | 40 | 100 | 2640000 + 3 | 40 | 200 | 0 + 1 | 30 | 500 | 60 + 3 | 40 | | 0 + 4 | 40 | 800 | 1 + 4 | | | 2 + 1 | 50 | | 0 + 1 | 20 | 100 | 0 + 3 | 30 | 500 | 60 + 3 | 30 | 600 | 60 + 2 | 50 | 400 | 0 + 2 | 50 | | 0 + 3 | | | 120 + 1 | 30 | | 60 + 1 | 20 | | 0 + 2 | | | 2640000 + 3 | 30 | | 120 + 4 | 40 | | 2 + 4 | 40 | 700 | 1 + 1 | 30 | 300 | 0 + 1 | 10 | 200 | 0 + 1 | | | 60 + | | | 2640182 +(26 rows) + -- -- This caused a crash in ROLLUP planning at one point. -- +EXPLAIN (costs off) +SELECT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> HashAggregate + Group Key: sale.dt, sale.cn, sale.pn, sale.vn, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: sale.dt, sale.cn, sale.pn, sale.vn, (GROUPINGSET_ID()) + -> Partial MixedAggregate + Hash Key: sale.dt, sale.cn, sale.pn, sale.vn + Hash Key: sale.dt, sale.cn, sale.pn + Hash Key: sale.dt, sale.cn + Group Key: () + -> Hash Join + Hash Cond: (sale.vn = vendor.vn) + -> Seq Scan on sale + -> Hash + -> Broadcast Motion 3:3 (slice3; segments: 3) + -> Seq Scan on vendor + Optimizer: Postgres query optimizer +(17 rows) + SELECT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn @@ -498,6 +575,36 @@ GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); (34 rows) +EXPLAIN (costs off) +SELECT DISTINCT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + QUERY PLAN +--------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> HashAggregate + Group Key: sale.vn + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: sale.vn + -> HashAggregate + Group Key: sale.dt, sale.cn, sale.pn, sale.vn, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: sale.dt, sale.cn, sale.pn, sale.vn, (GROUPINGSET_ID()) + -> Partial MixedAggregate + Hash Key: sale.dt, sale.cn, sale.pn, sale.vn + Hash Key: sale.dt, sale.cn, sale.pn + Hash Key: sale.dt, sale.cn + Group Key: () + -> Hash Join + Hash Cond: (sale.vn = vendor.vn) + -> Seq Scan on sale + -> Hash + -> Broadcast Motion 3:3 (slice4; segments: 3) + -> Seq Scan on vendor + Optimizer: Postgres query optimizer +(21 rows) + SELECT DISTINCT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn diff --git a/src/test/regress/expected/bfv_olap_optimizer.out b/src/test/regress/expected/bfv_olap_optimizer.out index ae41f93494..b76cfccd72 100644 --- a/src/test/regress/expected/bfv_olap_optimizer.out +++ b/src/test/regress/expected/bfv_olap_optimizer.out @@ -453,9 +453,141 @@ where g in ( 15 (1 row) +-- +-- Test to check the query plan for a ROLLUP query. +-- +explain (costs off) select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Sequence + -> Shared Scan (share slice:id 1:0) + -> Seq Scan on sale + -> Append + -> GroupAggregate + Group Key: share0_ref2.cn, share0_ref2.vn, share0_ref2.pn + -> Sort + Sort Key: share0_ref2.cn, share0_ref2.vn, share0_ref2.pn + -> Shared Scan (share slice:id 1:0) + -> Finalize GroupAggregate + Group Key: share0_ref3.cn, share0_ref3.vn + -> Sort + Sort Key: share0_ref3.cn, share0_ref3.vn + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: share0_ref3.cn, share0_ref3.vn + -> Partial GroupAggregate + Group Key: share0_ref3.cn, share0_ref3.vn + -> Sort + Sort Key: share0_ref3.cn, share0_ref3.vn + -> Shared Scan (share slice:id 2:0) + -> Finalize GroupAggregate + Group Key: share0_ref4.cn + -> Sort + Sort Key: share0_ref4.cn + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: share0_ref4.cn + -> Partial GroupAggregate + Group Key: share0_ref4.cn + -> Sort + Sort Key: share0_ref4.cn + -> Shared Scan (share slice:id 3:0) + -> Result + -> Redistribute Motion 1:3 (slice4) + -> Finalize Aggregate + -> Gather Motion 3:1 (slice5; segments: 3) + -> Partial Aggregate + -> Shared Scan (share slice:id 5:0) + Optimizer: Pivotal Optimizer (GPORCA) +(39 rows) + +select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); + cn | vn | pn | sum +----+----+-----+--------- + 1 | 10 | 200 | 0 + 3 | 30 | 500 | 60 + 2 | 40 | | 2640000 + 3 | 40 | | 0 + 2 | 50 | 400 | 0 + 3 | 30 | 600 | 60 + 4 | 40 | 800 | 1 + 3 | 30 | | 120 + 4 | 40 | | 2 + 2 | | | 2640000 + 3 | | | 120 + 4 | | | 2 + 1 | 20 | 100 | 0 + 1 | 30 | 300 | 0 + 1 | 30 | 500 | 60 + 1 | 50 | 400 | 0 + 2 | 40 | 100 | 2640000 + 3 | 40 | 200 | 0 + 4 | 40 | 700 | 1 + 1 | 10 | | 0 + 1 | 20 | | 0 + 1 | 30 | | 60 + 1 | 50 | | 0 + 2 | 50 | | 0 + 1 | | | 60 + | | | 2640182 +(26 rows) + -- -- This caused a crash in ROLLUP planning at one point. -- +EXPLAIN (costs off) +SELECT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Sequence + -> Shared Scan (share slice:id 1:0) + -> Nested Loop + Join Filter: true + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: sale.vn + -> Seq Scan on sale + -> Index Scan using vendor_pkey on vendor + Index Cond: (vn = sale.vn) + -> Append + -> GroupAggregate + Group Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Sort + Sort Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Shared Scan (share slice:id 1:0) + -> GroupAggregate + Group Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Sort + Sort Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> GroupAggregate + Group Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Sort + Sort Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Shared Scan (share slice:id 3:0) + -> GroupAggregate + Group Key: share0_ref4.dt, share0_ref4.cn + -> Sort + Sort Key: share0_ref4.dt, share0_ref4.cn + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: share0_ref4.dt, share0_ref4.cn + -> GroupAggregate + Group Key: share0_ref4.dt, share0_ref4.cn + -> Sort + Sort Key: share0_ref4.dt, share0_ref4.cn + -> Shared Scan (share slice:id 4:0) + -> Result + -> Redistribute Motion 1:3 (slice5) + -> Aggregate + -> Gather Motion 3:1 (slice6; segments: 3) + -> Aggregate + -> Shared Scan (share slice:id 6:0) + Optimizer: Pivotal Optimizer (GPORCA) +(45 rows) + SELECT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn @@ -498,6 +630,70 @@ GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); (34 rows) +EXPLAIN (costs off) +SELECT DISTINCT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> GroupAggregate + Group Key: share0_ref2.vn + -> Sort + Sort Key: share0_ref2.vn + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: share0_ref2.vn + -> GroupAggregate + Group Key: share0_ref2.vn + -> Sort + Sort Key: share0_ref2.vn + -> Sequence + -> Shared Scan (share slice:id 2:0) + -> Nested Loop + Join Filter: true + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: sale.vn + -> Seq Scan on sale + -> Index Scan using vendor_pkey on vendor + Index Cond: (vn = sale.vn) + -> Append + -> GroupAggregate + Group Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Sort + Sort Key: share0_ref2.dt, share0_ref2.cn, share0_ref2.pn, share0_ref2.vn + -> Shared Scan (share slice:id 2:0) + -> GroupAggregate + Group Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Sort + Sort Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Redistribute Motion 3:3 (slice4; segments: 3) + Hash Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> GroupAggregate + Group Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Sort + Sort Key: share0_ref3.dt, share0_ref3.cn, share0_ref3.pn + -> Shared Scan (share slice:id 4:0) + -> GroupAggregate + Group Key: share0_ref4.dt, share0_ref4.cn + -> Sort + Sort Key: share0_ref4.dt, share0_ref4.cn + -> Redistribute Motion 3:3 (slice5; segments: 3) + Hash Key: share0_ref4.dt, share0_ref4.cn + -> GroupAggregate + Group Key: share0_ref4.dt, share0_ref4.cn + -> Sort + Sort Key: share0_ref4.dt, share0_ref4.cn + -> Shared Scan (share slice:id 5:0) + -> Result + -> Redistribute Motion 1:3 (slice6) + -> Aggregate + -> Gather Motion 3:1 (slice7; segments: 3) + -> Aggregate + -> Shared Scan (share slice:id 7:0) + Optimizer: Pivotal Optimizer (GPORCA) +(55 rows) + SELECT DISTINCT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index 403c1fbb63..4493b747b0 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -919,9 +919,9 @@ explain (costs off) QUERY PLAN ---------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (NULL::integer) + Merge Key: share0_ref2.a -> Sort - Sort Key: (NULL::integer) + Sort Key: share0_ref2.a -> Sequence -> Shared Scan (share slice:id 1:0) -> Seq Scan on gstest2 diff --git a/src/test/regress/sql/bfv_olap.sql b/src/test/regress/sql/bfv_olap.sql index 46d9374980..c1a7a7759e 100644 --- a/src/test/regress/sql/bfv_olap.sql +++ b/src/test/regress/sql/bfv_olap.sql @@ -333,15 +333,32 @@ where g in ( select rank() over (order by x) from generate_series(1,5) x ); +-- +-- Test to check the query plan for a ROLLUP query. +-- +explain (costs off) select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); +select cn, vn, pn, sum(qty*prc) from sale group by rollup(cn,vn,pn); -- -- This caused a crash in ROLLUP planning at one point. -- +EXPLAIN (costs off) +SELECT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + SELECT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); +EXPLAIN (costs off) +SELECT DISTINCT sale.vn +FROM sale,vendor +WHERE sale.vn=vendor.vn +GROUP BY ROLLUP( (sale.dt,sale.cn),(sale.pn),(sale.vn)); + SELECT DISTINCT sale.vn FROM sale,vendor WHERE sale.vn=vendor.vn --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
