On Thu, 18 Aug 2022 at 02:46, Tomas Vondra <tomas.von...@enterprisedb.com> wrote: > So I don't think the current costing is wrong, but it certainly is more > complex. But the test does not test what it intended - I have two ideas > how to make it work: > > 1) increase the number of rows in the table > > 2) increase cpu_operator_cost (for that one test?) > > 3) tweak the costing somehow, to increase the cost a bit
Why not, 4) SET parallel_setup_cost = 0; there are plenty of other places we do just that so we get a parallel plan without having to generate enough cost to drown out the parallel worker startup cost. Here are a couple of patches to demo the idea. David
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 4b41ccf1aa..db36e3a150 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -942,6 +942,7 @@ INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM gen ANALYZE pagg_tab_ml; -- For Parallel Append SET max_parallel_workers_per_gather TO 2; +SET parallel_setup_cost = 0; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, but still we do not see a partial aggregation as array_agg() @@ -1025,6 +1026,7 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 (25 rows) +RESET parallel_setup_cost; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them. diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index c17294b15b..ab070fee24 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -222,6 +222,7 @@ ANALYZE pagg_tab_ml; -- For Parallel Append SET max_parallel_workers_per_gather TO 2; +SET parallel_setup_cost = 0; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with @@ -235,6 +236,8 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; +RESET parallel_setup_cost; + -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them.
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index a08a3825ff..0dc6d63347 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -942,40 +942,43 @@ INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM gen ANALYZE pagg_tab_ml; -- For Parallel Append SET max_parallel_workers_per_gather TO 2; +SET parallel_setup_cost = 0; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, but still we do not see a partial aggregation as array_agg() -- is not partial agg safe. EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; - QUERY PLAN --------------------------------------------------------------------------------------- - Sort - Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) - -> Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 -(25 rows) + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 +(27 rows) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | array_agg | count @@ -994,33 +997,36 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA -- Without ORDER BY clause, to test Gather at top-most path EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; - QUERY PLAN ---------------------------------------------------------------------- - Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 -(23 rows) + QUERY PLAN +--------------------------------------------------------------------------- + Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 +(25 rows) +RESET parallel_setup_cost; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them. diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql index c17294b15b..ab070fee24 100644 --- a/src/test/regress/sql/partition_aggregate.sql +++ b/src/test/regress/sql/partition_aggregate.sql @@ -222,6 +222,7 @@ ANALYZE pagg_tab_ml; -- For Parallel Append SET max_parallel_workers_per_gather TO 2; +SET parallel_setup_cost = 0; -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with @@ -235,6 +236,8 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; +RESET parallel_setup_cost; + -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with -- PARTITION KEY, thus we will have a partial aggregation for them.