On Thu, 18 Aug 2022 at 02:46, Tomas Vondra
<[email protected]> wrote:
> So I don't think the current costing is wrong, but it certainly is more
> complex. But the test does not test what it intended - I have two ideas
> how to make it work:
>
> 1) increase the number of rows in the table
>
> 2) increase cpu_operator_cost (for that one test?)
>
> 3) tweak the costing somehow, to increase the cost a bit
Why not, 4) SET parallel_setup_cost = 0; there are plenty of other
places we do just that so we get a parallel plan without having to
generate enough cost to drown out the parallel worker startup cost.
Here are a couple of patches to demo the idea.
David
diff --git a/src/test/regress/expected/partition_aggregate.out
b/src/test/regress/expected/partition_aggregate.out
index 4b41ccf1aa..db36e3a150 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -942,6 +942,7 @@ INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i %
4, 'FM0000') FROM gen
ANALYZE pagg_tab_ml;
-- For Parallel Append
SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
-- PARTITION KEY, but still we do not see a partial aggregation as array_agg()
@@ -1025,6 +1026,7 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM
pagg_tab_ml GROUP BY a HA
-> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
(25 rows)
+RESET parallel_setup_cost;
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
-- PARTITION KEY, thus we will have a partial aggregation for them.
diff --git a/src/test/regress/sql/partition_aggregate.sql
b/src/test/regress/sql/partition_aggregate.sql
index c17294b15b..ab070fee24 100644
--- a/src/test/regress/sql/partition_aggregate.sql
+++ b/src/test/regress/sql/partition_aggregate.sql
@@ -222,6 +222,7 @@ ANALYZE pagg_tab_ml;
-- For Parallel Append
SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -235,6 +236,8 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM
pagg_tab_ml GROUP BY a HA
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a
HAVING avg(b) < 3;
+RESET parallel_setup_cost;
+
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
-- PARTITION KEY, thus we will have a partial aggregation for them.
diff --git a/src/test/regress/expected/partition_aggregate.out
b/src/test/regress/expected/partition_aggregate.out
index a08a3825ff..0dc6d63347 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -942,40 +942,43 @@ INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i
% 4, 'FM0000') FROM gen
ANALYZE pagg_tab_ml;
-- For Parallel Append
SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
-- PARTITION KEY, but still we do not see a partial aggregation as array_agg()
-- is not partial agg safe.
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a
HAVING avg(b) < 3 ORDER BY 1, 2, 3;
- QUERY PLAN
---------------------------------------------------------------------------------------
- Sort
- Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT
pagg_tab_ml.c))
- -> Append
- -> GroupAggregate
- Group Key: pagg_tab_ml.a
- Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml.a
- -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
- -> GroupAggregate
- Group Key: pagg_tab_ml_2.a
- Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_2.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
- -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
- -> GroupAggregate
- Group Key: pagg_tab_ml_5.a
- Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_5.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
- -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(25 rows)
+ QUERY PLAN
+--------------------------------------------------------------------------------------------
+ Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT
pagg_tab_ml.c))
+ -> Parallel Append
+ -> GroupAggregate
+ Group Key: pagg_tab_ml.a
+ Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml.a
+ -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_5.a
+ Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_5.a
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p3_s1
pagg_tab_ml_5
+ -> Seq Scan on pagg_tab_ml_p3_s2
pagg_tab_ml_6
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_2.a
+ Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_2.a
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p2_s1
pagg_tab_ml_2
+ -> Seq Scan on pagg_tab_ml_p2_s2
pagg_tab_ml_3
+(27 rows)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a
HAVING avg(b) < 3 ORDER BY 1, 2, 3;
a | sum | array_agg | count
@@ -994,33 +997,36 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM
pagg_tab_ml GROUP BY a HA
-- Without ORDER BY clause, to test Gather at top-most path
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a
HAVING avg(b) < 3;
- QUERY PLAN
----------------------------------------------------------------------
- Append
- -> GroupAggregate
- Group Key: pagg_tab_ml.a
- Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml.a
- -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
- -> GroupAggregate
- Group Key: pagg_tab_ml_2.a
- Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_2.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
- -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
- -> GroupAggregate
- Group Key: pagg_tab_ml_5.a
- Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
- -> Sort
- Sort Key: pagg_tab_ml_5.a
- -> Append
- -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
- -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(23 rows)
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Gather
+ Workers Planned: 2
+ -> Parallel Append
+ -> GroupAggregate
+ Group Key: pagg_tab_ml.a
+ Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml.a
+ -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_5.a
+ Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_5.a
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+ -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+ -> GroupAggregate
+ Group Key: pagg_tab_ml_2.a
+ Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+ -> Sort
+ Sort Key: pagg_tab_ml_2.a
+ -> Append
+ -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+ -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(25 rows)
+RESET parallel_setup_cost;
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
-- PARTITION KEY, thus we will have a partial aggregation for them.
diff --git a/src/test/regress/sql/partition_aggregate.sql
b/src/test/regress/sql/partition_aggregate.sql
index c17294b15b..ab070fee24 100644
--- a/src/test/regress/sql/partition_aggregate.sql
+++ b/src/test/regress/sql/partition_aggregate.sql
@@ -222,6 +222,7 @@ ANALYZE pagg_tab_ml;
-- For Parallel Append
SET max_parallel_workers_per_gather TO 2;
+SET parallel_setup_cost = 0;
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
@@ -235,6 +236,8 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM
pagg_tab_ml GROUP BY a HA
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a
HAVING avg(b) < 3;
+RESET parallel_setup_cost;
+
-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
-- for level 1 only. For subpartitions, GROUP BY clause does not match with
-- PARTITION KEY, thus we will have a partial aggregation for them.