Hi, While developing some improvements for TPC-DS queries I found out that with UNION ALL partial paths are not emitted. Whilst fixing that I also came across the subquery costing which does not seem to consider parallelism when doing the costing.
I added a simplified testcase in pg-regress to show this goes wrong, and attached also a before and after explain output of tpc-ds SF100 query 5 based on version 12.4. I hope I followed all etiquette and these kind of improvements are welcome. Kind regards, Luc Swarm64
From 651999e07735f7dca887b4b672a008620515e857 Mon Sep 17 00:00:00 2001 From: Luc Vlaming <l...@swarm64.com> Date: Tue, 13 Oct 2020 09:35:33 +0200 Subject: [PATCH v1] Allow partial UNION ALL; improve parallel subquery costing --- src/backend/optimizer/path/costsize.c | 11 ++++++ src/backend/optimizer/prep/prepunion.c | 4 +++ src/test/regress/expected/union.out | 50 ++++++++++++++++++++++++++ src/test/regress/sql/union.sql | 35 ++++++++++++++++++ 4 files changed, 100 insertions(+) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index a2a9b1f7be..b05efb8bd5 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1316,6 +1316,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, startup_cost += path->path.pathtarget->cost.startup; run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + run_cost /= parallel_divisor; + } + path->path.startup_cost += startup_cost; path->path.total_cost += startup_cost + run_cost; } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index cd9d49c1f7..99da4297e6 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -675,6 +675,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, NIL, NULL, parallel_workers, enable_parallel_append, NIL, -1); + + if (op->all && enable_parallel_append) + add_partial_path(result_rel, ppath); + ppath = (Path *) create_gather_path(root, result_rel, ppath, result_rel->reltarget, NULL, NULL); diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 7189f5bd3d..77a7e5e759 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1052,3 +1052,53 @@ where (x = 0) or (q1 >= q2 and q1 <= q2); 4567890123456789 | 4567890123456789 | 1 (6 rows) +-- Test handling of appendrel with different types which disables the path flattening and +-- forces a subquery node. for the subquery node ensure the rowcounts are correct. +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain %s', $1) + loop + tmp := regexp_match(ln, 'rows=(\d*)'); + return query select tmp[1]::int; + end loop; +end; +$$; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +explain (costs off) +select *, 0::int from tenk1 a +union all +select *, 1::bigint from tenk1 b; + QUERY PLAN +------------------------------------------------ + Gather + Workers Planned: 2 + -> Parallel Append + -> Subquery Scan on "*SELECT* 1" + -> Parallel Seq Scan on tenk1 a + -> Parallel Seq Scan on tenk1 b +(6 rows) + +select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;'); + check_estimated_rows +---------------------- + 19990 + + 8330 + 4165 + 4165 + 4165 +(6 rows) + +reset parallel_setup_cost; +reset parallel_tuple_cost; +reset min_parallel_table_scan_size; +drop function check_estimated_rows(text); diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index 5f4881d594..93a2c7b329 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -440,3 +440,38 @@ select * from union all select *, 1 as x from int8_tbl b) ss where (x = 0) or (q1 >= q2 and q1 <= q2); + +-- Test handling of appendrel with different types which disables the path flattening and +-- forces a subquery node. for the subquery node ensure the rowcounts are correct. +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain %s', $1) + loop + tmp := regexp_match(ln, 'rows=(\d*)'); + return query select tmp[1]::int; + end loop; +end; +$$; + +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; + +explain (costs off) +select *, 0::int from tenk1 a +union all +select *, 1::bigint from tenk1 b; + +select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;'); + +reset parallel_setup_cost; +reset parallel_tuple_cost; +reset min_parallel_table_scan_size; +drop function check_estimated_rows(text); -- 2.25.1
Limit (cost=33975298.60..33975298.85 rows=100 width=88) -> Sort (cost=33975298.60..33975304.25 rows=2262 width=88) Sort Key: ('store channel'::text), (('store'::text || (ssr.s_store_id)::text)) -> MixedAggregate (cost=17212822.43..33975212.15 rows=2262 width=88) Hash Key: ('store channel'::text), (('store'::text || (ssr.s_store_id)::text)) Hash Key: ('store channel'::text) Group Key: () -> Append (cost=17212822.43..33974571.14 rows=20613 width=88) -> Subquery Scan on ssr (cost=17212822.43..17213803.25 rows=201 width=88) -> GroupAggregate (cost=17212822.43..17213800.24 rows=201 width=49) Group Key: store.s_store_id -> Sort (cost=17212822.43..17212985.07 rows=65053 width=49) Sort Key: store.s_store_id -> Hash Join (cost=1965.58..17207621.67 rows=65053 width=49) Hash Cond: ("*SELECT* 1".store_sk = store.s_store_sk) -> Hash Join (cost=1943.48..17206705.09 rows=65053 width=36) Hash Cond: ("*SELECT* 1".date_sk = date_dim.d_date_sk) -> Append (cost=0.00..12848065.13 rows=316803342 width=40) -> Subquery Scan on "*SELECT* 1" (cost=0.00..10312104.14 rows=288004992 width=40) -> Gather (cost=0.00..5992029.26 rows=288004992 width=52) Workers Planned: 54 -> Parallel Seq Scan on store_sales (cost=0.00..5992029.26 rows=5333426 width=52) -> Subquery Scan on "*SELECT* 2" (cost=0.00..951944.28 rows=28798350 width=40) -> Gather (cost=0.00..519969.03 rows=28798350 width=52) Workers Planned: 54 -> Parallel Seq Scan on store_returns (cost=0.00..519969.03 rows=533303 width=52) -> Hash (cost=1943.29..1943.29 rows=15 width=4) -> Gather (cost=0.00..1943.29 rows=15 width=4) Workers Planned: 54 -> Parallel Seq Scan on date_dim (cost=0.00..1943.29 rows=1 width=4) Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date)) -> Hash (cost=17.07..17.07 rows=402 width=21) -> Gather (cost=0.00..17.07 rows=402 width=21) Workers Planned: 54 -> Parallel Seq Scan on store (cost=0.00..17.07 rows=7 width=21) -> Subquery Scan on csr (cost=9776975.68..9777973.55 rows=20400 width=88) -> GroupAggregate (cost=9776975.68..9777667.55 rows=20400 width=49) Group Key: catalog_page.cp_catalog_page_id -> Sort (cost=9776975.68..9777056.99 rows=32525 width=49) Sort Key: catalog_page.cp_catalog_page_id -> Hash Join (cost=2652.26..9774538.05 rows=32525 width=49) Hash Cond: ("*SELECT* 1_1".page_sk = catalog_page.cp_catalog_page_sk) -> Hash Join (cost=1943.48..9773382.05 rows=32525 width=36) Hash Cond: ("*SELECT* 1_1".date_sk = date_dim_1.d_date_sk) -> Append (cost=0.00..7593154.66 rows=158396994 width=40) -> Subquery Scan on "*SELECT* 1_1" (cost=0.00..6266326.91 rows=144001728 width=40) -> Gather (cost=0.00..4106300.99 rows=144001728 width=52) Workers Planned: 54 -> Parallel Seq Scan on catalog_sales (cost=0.00..4106300.99 rows=2666699 width=52) -> Subquery Scan on "*SELECT* 2_1" (cost=0.00..534842.78 rows=14395266 width=40) -> Gather (cost=0.00..318913.79 rows=14395266 width=52) Workers Planned: 54 -> Parallel Seq Scan on catalog_returns (cost=0.00..318913.79 rows=266579 width=52) -> Hash (cost=1943.29..1943.29 rows=15 width=4) -> Gather (cost=0.00..1943.29 rows=15 width=4) Workers Planned: 54 -> Parallel Seq Scan on date_dim date_dim_1 (cost=0.00..1943.29 rows=1 width=4) Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date)) -> Hash (cost=453.78..453.78 rows=20400 width=21) -> Gather (cost=0.00..453.78 rows=20400 width=21) Workers Planned: 54 -> Parallel Seq Scan on catalog_page (cost=0.00..453.78 rows=378 width=21) -> Subquery Scan on wsr (cost=6982661.69..6982691.27 rows=12 width=88) -> GroupAggregate (cost=6982661.69..6982691.09 rows=12 width=49) Group Key: web_site.web_site_id -> Sort (cost=6982661.69..6982666.57 rows=1952 width=49) Sort Key: web_site.web_site_id -> Hash Join (cost=1944.78..6982555.00 rows=1952 width=49) Hash Cond: ("*SELECT* 1_2".wsr_web_site_sk = web_site.web_site_sk) -> Hash Join (cost=1943.48..6982473.18 rows=16265 width=36) Hash Cond: ("*SELECT* 1_2".date_sk = date_dim_2.d_date_sk) -> Append (cost=0.00..5891230.26 rows=79209949 width=40) -> Subquery Scan on "*SELECT* 1_2" (cost=0.00..3149874.55 rows=72009168 width=40) -> Gather (cost=0.00..2069737.03 rows=72009168 width=52) Workers Planned: 54 -> Parallel Seq Scan on web_sales (cost=0.00..2069737.03 rows=1333503 width=52) -> Subquery Scan on "*SELECT* 2_2" (cost=2089739.58..2345305.96 rows=7200781 width=40) -> Gather (cost=2089739.58..2237294.24 rows=7200781 width=52) Workers Planned: 54 -> Parallel Hash Left Join (cost=2089739.58..2237294.24 rows=133348 width=52) Hash Cond: ((web_returns.wr_item_sk = web_sales_1.ws_item_sk) AND (web_returns.wr_order_number = web_sales_1.ws_order_number)) -> Parallel Seq Scan on web_returns (cost=0.00..142887.48 rows=133348 width=28) -> Parallel Hash (cost=2069737.03..2069737.03 rows=1333503 width=12) -> Parallel Seq Scan on web_sales web_sales_1 (cost=0.00..2069737.03 rows=1333503 width=12) -> Hash (cost=1943.29..1943.29 rows=15 width=4) -> Gather (cost=0.00..1943.29 rows=15 width=4) Workers Planned: 54 -> Parallel Seq Scan on date_dim date_dim_2 (cost=0.00..1943.29 rows=1 width=4) Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date)) -> Hash (cost=1.00..1.00 rows=24 width=21) -> Gather (cost=0.00..1.00 rows=24 width=21) Workers Planned: 54 -> Parallel Seq Scan on web_site (cost=0.00..1.00 rows=1 width=21)
Limit (cost=15600476.15..15600476.40 rows=100 width=88) -> Sort (cost=15600476.15..15600481.81 rows=2262 width=88) Sort Key: ('store channel'::text), (('store'::text || (ssr.s_store_id)::text)) -> MixedAggregate (cost=6712029.63..15600389.70 rows=2262 width=88) Hash Key: ('store channel'::text), (('store'::text || (ssr.s_store_id)::text)) Hash Key: ('store channel'::text) Group Key: () -> Append (cost=6712029.63..15599748.69 rows=20613 width=88) -> Subquery Scan on ssr (cost=6712029.63..6712531.30 rows=201 width=88) -> Finalize GroupAggregate (cost=6712029.63..6712528.29 rows=201 width=49) Group Key: store.s_store_id -> Gather Merge (cost=6712029.63..6712390.60 rows=10854 width=49) Workers Planned: 54 -> Partial GroupAggregate (cost=6712028.04..6712048.12 rows=201 width=49) Group Key: store.s_store_id -> Sort (cost=6712028.04..6712031.05 rows=1205 width=49) Sort Key: store.s_store_id -> Parallel Hash Join (cost=1960.47..6711966.37 rows=1205 width=49) Hash Cond: ("*SELECT* 1".store_sk = store.s_store_sk) -> Parallel Hash Join (cost=1943.30..6711944.48 rows=1205 width=36) Hash Cond: ("*SELECT* 1".date_sk = date_dim.d_date_sk) -> Parallel Append (cost=0.00..6629332.86 rows=5866729 width=40) -> Subquery Scan on "*SELECT* 1" (cost=0.00..6072030.64 rows=5333426 width=40) -> Parallel Seq Scan on store_sales (cost=0.00..5992029.26 rows=5333426 width=52) -> Subquery Scan on "*SELECT* 2" (cost=0.00..527968.57 rows=533303 width=40) -> Parallel Seq Scan on store_returns (cost=0.00..519969.03 rows=533303 width=52) -> Parallel Hash (cost=1943.29..1943.29 rows=1 width=4) -> Parallel Seq Scan on date_dim (cost=0.00..1943.29 rows=1 width=4) Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date)) -> Parallel Hash (cost=17.07..17.07 rows=7 width=21) -> Parallel Seq Scan on store (cost=0.00..17.07 rows=7 width=21) -> Subquery Scan on csr (cost=4526646.86..4528599.23 rows=20400 width=88) -> Finalize GroupAggregate (cost=4526646.86..4528293.23 rows=20400 width=49) Group Key: catalog_page.cp_catalog_page_id -> Gather Merge (cost=4526646.86..4527682.88 rows=32508 width=49) Workers Planned: 54 -> Partial GroupAggregate (cost=4526645.27..4526660.32 rows=602 width=49) Group Key: catalog_page.cp_catalog_page_id -> Sort (cost=4526645.27..4526646.77 rows=602 width=49) Sort Key: catalog_page.cp_catalog_page_id -> Parallel Hash Join (cost=2401.81..4526617.48 rows=602 width=49) Hash Cond: ("*SELECT* 1_1".page_sk = catalog_page.cp_catalog_page_sk) -> Parallel Hash Join (cost=1943.30..4526156.60 rows=602 width=36) Hash Cond: ("*SELECT* 1_1".date_sk = date_dim_1.d_date_sk) -> Parallel Append (cost=0.00..4483880.33 rows=2933278 width=40) -> Subquery Scan on "*SELECT* 1_1" (cost=0.00..4146301.47 rows=2666699 width=40) -> Parallel Seq Scan on catalog_sales (cost=0.00..4106300.99 rows=2666699 width=52) -> Subquery Scan on "*SELECT* 2_1" (cost=0.00..322912.47 rows=266579 width=40) -> Parallel Seq Scan on catalog_returns (cost=0.00..318913.79 rows=266579 width=52) -> Parallel Hash (cost=1943.29..1943.29 rows=1 width=4) -> Parallel Seq Scan on date_dim date_dim_1 (cost=0.00..1943.29 rows=1 width=4) Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date)) -> Parallel Hash (cost=453.78..453.78 rows=378 width=21) -> Parallel Seq Scan on catalog_page (cost=0.00..453.78 rows=378 width=21) -> Subquery Scan on wsr (cost=4358485.68..4358515.10 rows=12 width=88) -> Finalize GroupAggregate (cost=4358485.68..4358514.92 rows=12 width=49) Group Key: web_site.web_site_id -> Gather Merge (cost=4358485.68..4358506.70 rows=648 width=49) Workers Planned: 54 -> Partial GroupAggregate (cost=4358484.09..4358484.75 rows=12 width=49) Group Key: web_site.web_site_id -> Sort (cost=4358484.09..4358484.18 rows=36 width=49) Sort Key: web_site.web_site_id -> Parallel Hash Join (cost=1944.32..4358483.16 rows=36 width=49) Hash Cond: ("*SELECT* 2_2".wsr_web_site_sk = web_site.web_site_sk) -> Parallel Hash Join (cost=1943.30..4358481.00 rows=301 width=36) Hash Cond: ("*SELECT* 2_2".date_sk = date_dim_2.d_date_sk) -> Parallel Append (cost=0.00..4336368.29 rows=1466851 width=40) -> Subquery Scan on "*SELECT* 2_2" (cost=2089739.58..2239294.46 rows=133348 width=40) -> Parallel Hash Left Join (cost=2089739.58..2237294.24 rows=133348 width=52) Hash Cond: ((web_returns.wr_item_sk = web_sales.ws_item_sk) AND (web_returns.wr_order_number = web_sales.ws_order_number)) -> Parallel Seq Scan on web_returns (cost=0.00..142887.48 rows=133348 width=28) -> Parallel Hash (cost=2069737.03..2069737.03 rows=1333503 width=12) -> Parallel Seq Scan on web_sales (cost=0.00..2069737.03 rows=1333503 width=12) -> Subquery Scan on "*SELECT* 1_2" (cost=0.00..2089739.58 rows=1333503 width=40) -> Parallel Seq Scan on web_sales web_sales_1 (cost=0.00..2069737.03 rows=1333503 width=52) -> Parallel Hash (cost=1943.29..1943.29 rows=1 width=4) -> Parallel Seq Scan on date_dim date_dim_2 (cost=0.00..1943.29 rows=1 width=4) Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date)) -> Parallel Hash (cost=1.00..1.00 rows=1 width=21) -> Parallel Seq Scan on web_site (cost=0.00..1.00 rows=1 width=21)