Re: allow partial union-all and improve parallel subquery costing
With the thread stalled and requests for a test (documentation really?) not responded to I'm marking this patch Returned with Feedback. -- Daniel Gustafsson https://vmware.com/
Re: allow partial union-all and improve parallel subquery costing
Le lundi 12 avril 2021, 14:01:36 CEST Luc Vlaming a écrit : > Here's an improved and rebased patch. Hope the description helps some > people. I will resubmit it to the next commitfest. > Hello Luc, I've taken a look at this patch, and while I don't fully understand its implications here are a couple remarks. I think you should add a test demonstrating the use of the new partial append path you add, for example using your base query: explain (costs off) select sum(two) from ( select *, 1::int from tenk1 a union all select *, 1::bigint from tenk1 b ) t ; I'm not sure I understand why the subquery scan rows estimate has not been accounted like you propose before, because the way it's done as of now basically doubles the estimate for the subqueryscan, since we account for it already being divided by it's number of workers, as mentioned in cost_append: /* * Apply parallel divisor to subpaths. Scale the number of rows * for each partial subpath based on the ratio of the parallel * divisor originally used for the subpath to the one we adopted. * Also add the cost of partial paths to the total cost, but * ignore non-partial paths for now. */ Do we have other nodes for which we make this assumption ? Also, adding a partial path comprised only of underlying partial paths might not be enough: maybe we should add one partial path even in the case of mixed partial / nonpartial paths like it's done in add_paths_to_append_rel ? Regards, -- Ronan Dunklau
Re: allow partial union-all and improve parallel subquery costing
Hi David, On 15-03-2021 14:09, David Steele wrote: Hi Luc, On 12/30/20 8:54 AM, Luc Vlaming wrote: Created a commitfest entry assuming this is the right thing to do so that someone can potentially pick it up during the commitfest. Providing an updated patch based on latest master. Looks like you need another rebase: http://cfbot.cputube.org/patch_32_2787.log. Marked as Waiting for Author. You may also want to give a more detailed description of what you have done here and why it improves execution plans. This may help draw some reviewers. Regards, Here's an improved and rebased patch. Hope the description helps some people. I will resubmit it to the next commitfest. Regards, Luc >From e918e7cf8c9fe628c7daba2ccf37ad767691e4c7 Mon Sep 17 00:00:00 2001 From: Luc Vlaming Date: Mon, 12 Apr 2021 09:55:30 +0200 Subject: [PATCH v4] Add explicit partial UNION ALL path and improve parallel subquery rowcounts and costing. By adding the partial union-all path we get parallel plans whenever the flatten_simple_union_all cannot be applied, e.g. whenever the column types do not exactly match. A simple testcase shows this in the regression tests. Also for e.g. tpc-ds query 5 we now get a more parallel plan for the part that processes the csr CTE. To make it more likely that the improved path is chosen another small fix is added which corrects the rowcounts when subquery nodes are used in parallel plans. --- src/backend/optimizer/path/costsize.c | 11 src/backend/optimizer/prep/prepunion.c| 4 ++ .../regress/expected/incremental_sort.out | 10 ++-- src/test/regress/expected/union.out | 52 +++ src/test/regress/sql/union.sql| 37 + 5 files changed, 108 insertions(+), 6 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 8577c7b138..1da6879c6d 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1426,6 +1426,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, startup_cost += path->path.pathtarget->cost.startup; run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + run_cost /= parallel_divisor; + } + path->path.startup_cost += startup_cost; path->path.total_cost += startup_cost + run_cost; } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 037dfaacfd..7d4a6a19c2 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -679,6 +679,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, NIL, NULL, parallel_workers, enable_parallel_append, -1); + + if (op->all && enable_parallel_append) + add_partial_path(result_rel, ppath); + ppath = (Path *) create_gather_path(root, result_rel, ppath, result_rel->reltarget, NULL, NULL); diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index a417b566d9..a0a31ba053 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1487,14 +1487,12 @@ explain (costs off) select * from t union select * from t order by 1,3; -> Unique -> Sort Sort Key: t.a, t.b, t.c - -> Append - -> Gather - Workers Planned: 2 + -> Gather + Workers Planned: 2 + -> Parallel Append -> Parallel Seq Scan on t - -> Gather - Workers Planned: 2 -> Parallel Seq Scan on t t_1 -(13 rows) +(11 rows) -- Full sort, not just incremental sort can be pushed below a gather merge path -- by generate_useful_gather_paths. diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 75f78db8f5..cf7660f524 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1420,3 +1420,55 @@ where (x = 0) or (q1 >= q2 and q1 <= q2); 4567890123456789 | 4567890123456789 | 1 (6 rows) +-- Test handling of appendrel with different types which disables the path flattening and +-- forces a subquery node. for the subquery node ensure the rowcounts are correct. +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare +ln text; +tmp text[]; +first_row bool := true; +begin +for ln in +execute format('explain %s', $1) +loop +tmp := regexp_ma
Re: allow partial union-all and improve parallel subquery costing
On 3/15/21 9:09 AM, David Steele wrote: On 12/30/20 8:54 AM, Luc Vlaming wrote: Created a commitfest entry assuming this is the right thing to do so that someone can potentially pick it up during the commitfest. Providing an updated patch based on latest master. Looks like you need another rebase: http://cfbot.cputube.org/patch_32_2787.log. Marked as Waiting for Author. You may also want to give a more detailed description of what you have done here and why it improves execution plans. This may help draw some reviewers. Since no new patch has been provided, marking this Returned with Feedback. Please resubmit to the next CF when you have a new patch. Regards, -- -David da...@pgmasters.net
Re: allow partial union-all and improve parallel subquery costing
Hi Luc, On 12/30/20 8:54 AM, Luc Vlaming wrote: Created a commitfest entry assuming this is the right thing to do so that someone can potentially pick it up during the commitfest. Providing an updated patch based on latest master. Looks like you need another rebase: http://cfbot.cputube.org/patch_32_2787.log. Marked as Waiting for Author. You may also want to give a more detailed description of what you have done here and why it improves execution plans. This may help draw some reviewers. Regards, -- -David da...@pgmasters.net
Re: allow partial union-all and improve parallel subquery costing
On 23-10-2020 07:51, Luc Vlaming wrote: On 14.10.20 09:38, Luc Vlaming wrote: Hi, It seems I ran the wrong make checks to verify everything is correct (make check instead of make installcheck-world) and this uncovered another regress test change. I also noticed the statistics are sometimes giving different row count results so I increased the row statistics target to make sure the regress output is stable. Updated patch attached which now successfully runs installcheck-world for v13 and master. Kind regards, Luc From: Luc Vlaming Sent: Tuesday, October 13, 2020 10:57 AM To: pgsql-hackers Subject: allow partial union-all and improve parallel subquery costing Hi, While developing some improvements for TPC-DS queries I found out that with UNION ALL partial paths are not emitted. Whilst fixing that I also came across the subquery costing which does not seem to consider parallelism when doing the costing. I added a simplified testcase in pg-regress to show this goes wrong, and attached also a before and after explain output of tpc-ds SF100 query 5 based on version 12.4. I hope I followed all etiquette and these kind of improvements are welcome. Kind regards, Luc Swarm64 Hi, Created a commitfest entry assuming this is the right thing to do so that someone can potentially pick it up during the commitfest. Kind regards, Luc Swarm64 Hi, Providing an updated patch based on latest master. Cheers, Luc >From 032c48b51ee0d436c91d9db81ce800d91168bd01 Mon Sep 17 00:00:00 2001 From: Luc Vlaming Date: Wed, 30 Dec 2020 14:49:48 +0100 Subject: [PATCH v3] Allow partial UNION ALL; improve parallel subquery costing --- src/backend/optimizer/path/costsize.c | 11 src/backend/optimizer/prep/prepunion.c| 4 ++ .../regress/expected/incremental_sort.out | 10 ++-- src/test/regress/expected/union.out | 52 +++ src/test/regress/sql/union.sql| 37 + 5 files changed, 108 insertions(+), 6 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 22d6935824..1c3b04c4d7 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1328,6 +1328,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, startup_cost += path->path.pathtarget->cost.startup; run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + run_cost /= parallel_divisor; + } + path->path.startup_cost += startup_cost; path->path.total_cost += startup_cost + run_cost; } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 745f443e5c..4001eb87f3 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -678,6 +678,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, NIL, NULL, parallel_workers, enable_parallel_append, NIL, -1); + + if (op->all && enable_parallel_append) + add_partial_path(result_rel, ppath); + ppath = (Path *) create_gather_path(root, result_rel, ppath, result_rel->reltarget, NULL, NULL); diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index a8cbfd9f5f..40265674c4 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1459,14 +1459,12 @@ explain (costs off) select * from t union select * from t order by 1,3; -> Unique -> Sort Sort Key: t.a, t.b, t.c - -> Append - -> Gather - Workers Planned: 2 + -> Gather + Workers Planned: 2 + -> Parallel Append -> Parallel Seq Scan on t - -> Gather - Workers Planned: 2 -> Parallel Seq Scan on t t_1 -(13 rows) +(11 rows) -- Full sort, not just incremental sort can be pushed below a gather merge path -- by generate_useful_gather_paths. diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 75f78db8f5..cf7660f524 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1420,3 +1420,55 @@ where (x = 0) or (q1 >= q2 and q1 <= q2); 4567890123456789 | 4567890123456789 | 1 (6 rows) +-- Test handling of appendrel with different types which disables the path
Re: allow partial union-all and improve parallel subquery costing
On 14.10.20 09:38, Luc Vlaming wrote: Hi, It seems I ran the wrong make checks to verify everything is correct (make check instead of make installcheck-world) and this uncovered another regress test change. I also noticed the statistics are sometimes giving different row count results so I increased the row statistics target to make sure the regress output is stable. Updated patch attached which now successfully runs installcheck-world for v13 and master. Kind regards, Luc From: Luc Vlaming Sent: Tuesday, October 13, 2020 10:57 AM To: pgsql-hackers Subject: allow partial union-all and improve parallel subquery costing Hi, While developing some improvements for TPC-DS queries I found out that with UNION ALL partial paths are not emitted. Whilst fixing that I also came across the subquery costing which does not seem to consider parallelism when doing the costing. I added a simplified testcase in pg-regress to show this goes wrong, and attached also a before and after explain output of tpc-ds SF100 query 5 based on version 12.4. I hope I followed all etiquette and these kind of improvements are welcome. Kind regards, Luc Swarm64 Hi, Created a commitfest entry assuming this is the right thing to do so that someone can potentially pick it up during the commitfest. Kind regards, Luc Swarm64
Re: allow partial union-all and improve parallel subquery costing
Hi, It seems I ran the wrong make checks to verify everything is correct (make check instead of make installcheck-world) and this uncovered another regress test change. I also noticed the statistics are sometimes giving different row count results so I increased the row statistics target to make sure the regress output is stable. Updated patch attached which now successfully runs installcheck-world for v13 and master. Kind regards, Luc From: Luc Vlaming Sent: Tuesday, October 13, 2020 10:57 AM To: pgsql-hackers Subject: allow partial union-all and improve parallel subquery costing Hi, While developing some improvements for TPC-DS queries I found out that with UNION ALL partial paths are not emitted. Whilst fixing that I also came across the subquery costing which does not seem to consider parallelism when doing the costing. I added a simplified testcase in pg-regress to show this goes wrong, and attached also a before and after explain output of tpc-ds SF100 query 5 based on version 12.4. I hope I followed all etiquette and these kind of improvements are welcome. Kind regards, Luc Swarm64 From 622903bff2108cb15d8fcf92b327f5ef00d41daa Mon Sep 17 00:00:00 2001 From: Luc Vlaming Date: Wed, 14 Oct 2020 09:22:46 +0200 Subject: [PATCH v2] Allow partial UNION ALL; improve parallel subquery costing --- src/backend/optimizer/path/costsize.c | 11 src/backend/optimizer/prep/prepunion.c| 4 ++ .../regress/expected/incremental_sort.out | 10 ++-- src/test/regress/expected/union.out | 52 +++ src/test/regress/sql/union.sql| 37 + 5 files changed, 108 insertions(+), 6 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index f39e6a9f80..c2018b3401 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1318,6 +1318,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, startup_cost += path->path.pathtarget->cost.startup; run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + run_cost /= parallel_divisor; + } + path->path.startup_cost += startup_cost; path->path.total_cost += startup_cost + run_cost; } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 745f443e5c..4001eb87f3 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -678,6 +678,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, NIL, NULL, parallel_workers, enable_parallel_append, NIL, -1); + + if (op->all && enable_parallel_append) + add_partial_path(result_rel, ppath); + ppath = (Path *) create_gather_path(root, result_rel, ppath, result_rel->reltarget, NULL, NULL); diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index e376ea1276..c9596b8e12 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1459,13 +1459,11 @@ explain (costs off) select * from t union select * from t order by 1,3; -> Unique -> Sort Sort Key: t.a, t.b, t.c - -> Append - -> Gather - Workers Planned: 2 + -> Gather + Workers Planned: 2 + -> Parallel Append -> Parallel Seq Scan on t - -> Gather - Workers Planned: 2 -> Parallel Seq Scan on t t_1 -(13 rows) +(11 rows) drop table t; diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 6e72e92d80..cd553c9c0c 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1052,3 +1052,55 @@ where (x = 0) or (q1 >= q2 and q1 <= q2); 4567890123456789 | 4567890123456789 | 1 (6 rows) +-- Test handling of appendrel with different types which disables the path flattening and +-- forces a subquery node. for the subquery node ensure the rowcounts are correct. +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare +ln text; +tmp text[]; +first_row bool := true; +begin +for ln in +execute format('explain %s', $1) +loop +tmp := regexp_match(ln, 'rows=(\d*)'); +return query select tmp[1
allow partial union-all and improve parallel subquery costing
Hi, While developing some improvements for TPC-DS queries I found out that with UNION ALL partial paths are not emitted. Whilst fixing that I also came across the subquery costing which does not seem to consider parallelism when doing the costing. I added a simplified testcase in pg-regress to show this goes wrong, and attached also a before and after explain output of tpc-ds SF100 query 5 based on version 12.4. I hope I followed all etiquette and these kind of improvements are welcome. Kind regards, Luc Swarm64 From 651999e07735f7dca887b4b672a008620515e857 Mon Sep 17 00:00:00 2001 From: Luc Vlaming Date: Tue, 13 Oct 2020 09:35:33 +0200 Subject: [PATCH v1] Allow partial UNION ALL; improve parallel subquery costing --- src/backend/optimizer/path/costsize.c | 11 ++ src/backend/optimizer/prep/prepunion.c | 4 +++ src/test/regress/expected/union.out| 50 ++ src/test/regress/sql/union.sql | 35 ++ 4 files changed, 100 insertions(+) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index a2a9b1f7be..b05efb8bd5 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1316,6 +1316,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, startup_cost += path->path.pathtarget->cost.startup; run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + run_cost /= parallel_divisor; + } + path->path.startup_cost += startup_cost; path->path.total_cost += startup_cost + run_cost; } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index cd9d49c1f7..99da4297e6 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -675,6 +675,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, NIL, NULL, parallel_workers, enable_parallel_append, NIL, -1); + + if (op->all && enable_parallel_append) + add_partial_path(result_rel, ppath); + ppath = (Path *) create_gather_path(root, result_rel, ppath, result_rel->reltarget, NULL, NULL); diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 7189f5bd3d..77a7e5e759 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1052,3 +1052,53 @@ where (x = 0) or (q1 >= q2 and q1 <= q2); 4567890123456789 | 4567890123456789 | 1 (6 rows) +-- Test handling of appendrel with different types which disables the path flattening and +-- forces a subquery node. for the subquery node ensure the rowcounts are correct. +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare +ln text; +tmp text[]; +first_row bool := true; +begin +for ln in +execute format('explain %s', $1) +loop +tmp := regexp_match(ln, 'rows=(\d*)'); +return query select tmp[1]::int; +end loop; +end; +$$; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +explain (costs off) +select *, 0::int from tenk1 a +union all +select *, 1::bigint from tenk1 b; + QUERY PLAN + + Gather + Workers Planned: 2 + -> Parallel Append + -> Subquery Scan on "*SELECT* 1" + -> Parallel Seq Scan on tenk1 a + -> Parallel Seq Scan on tenk1 b +(6 rows) + +select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;'); + check_estimated_rows +-- +19990 + + 8330 + 4165 + 4165 + 4165 +(6 rows) + +reset parallel_setup_cost; +reset parallel_tuple_cost; +reset min_parallel_table_scan_size; +drop function check_estimated_rows(text); diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index 5f4881d594..93a2c7b329 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -440,3 +440,38 @@ select * from union all select *, 1 as x from int8_tbl b) ss where (x = 0) or (q1 >= q2 and q1 <= q2); + +-- Test handling of appendrel with different types which disables the path flattening and +-- forces a subquery node. for the subquery node ensure the rowcounts are correct. +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare +ln text; +tmp text[]; +first_row bool := true; +begin +for ln in +execute format('explain %s', $1) +lo