Hi,
While developing some improvements for TPC-DS queries I found out that with
UNION ALL partial paths are not emitted. Whilst fixing that I also came across
the subquery costing which does not seem to consider parallelism when doing
the costing.
I added a simplified testcase in pg-regress to show this goes wrong, and
attached also a before and after explain output of tpc-ds SF100 query 5
based on version 12.4.
I hope I followed all etiquette and these kind of improvements are welcome.
Kind regards,
Luc
Swarm64
From 651999e07735f7dca887b4b672a008620515e857 Mon Sep 17 00:00:00 2001
From: Luc Vlaming <[email protected]>
Date: Tue, 13 Oct 2020 09:35:33 +0200
Subject: [PATCH v1] Allow partial UNION ALL; improve parallel subquery costing
---
src/backend/optimizer/path/costsize.c | 11 ++++++
src/backend/optimizer/prep/prepunion.c | 4 +++
src/test/regress/expected/union.out | 50 ++++++++++++++++++++++++++
src/test/regress/sql/union.sql | 35 ++++++++++++++++++
4 files changed, 100 insertions(+)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a2a9b1f7be..b05efb8bd5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1316,6 +1316,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
startup_cost += path->path.pathtarget->cost.startup;
run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
+ /* Adjust costing for parallelism, if used. */
+ if (path->path.parallel_workers > 0)
+ {
+ double parallel_divisor = get_parallel_divisor(&path->path);
+
+ path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
+
+ /* The CPU cost is divided among all the workers. */
+ run_cost /= parallel_divisor;
+ }
+
path->path.startup_cost += startup_cost;
path->path.total_cost += startup_cost + run_cost;
}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index cd9d49c1f7..99da4297e6 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -675,6 +675,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
NIL, NULL,
parallel_workers, enable_parallel_append,
NIL, -1);
+
+ if (op->all && enable_parallel_append)
+ add_partial_path(result_rel, ppath);
+
ppath = (Path *)
create_gather_path(root, result_rel, ppath,
result_rel->reltarget, NULL, NULL);
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 7189f5bd3d..77a7e5e759 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1052,3 +1052,53 @@ where (x = 0) or (q1 >= q2 and q1 <= q2);
4567890123456789 | 4567890123456789 | 1
(6 rows)
+-- Test handling of appendrel with different types which disables the path flattening and
+-- forces a subquery node. for the subquery node ensure the rowcounts are correct.
+create function check_estimated_rows(text) returns table (estimated int)
+language plpgsql as
+$$
+declare
+ ln text;
+ tmp text[];
+ first_row bool := true;
+begin
+ for ln in
+ execute format('explain %s', $1)
+ loop
+ tmp := regexp_match(ln, 'rows=(\d*)');
+ return query select tmp[1]::int;
+ end loop;
+end;
+$$;
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+explain (costs off)
+select *, 0::int from tenk1 a
+union all
+select *, 1::bigint from tenk1 b;
+ QUERY PLAN
+------------------------------------------------
+ Gather
+ Workers Planned: 2
+ -> Parallel Append
+ -> Subquery Scan on "*SELECT* 1"
+ -> Parallel Seq Scan on tenk1 a
+ -> Parallel Seq Scan on tenk1 b
+(6 rows)
+
+select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;');
+ check_estimated_rows
+----------------------
+ 19990
+
+ 8330
+ 4165
+ 4165
+ 4165
+(6 rows)
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+drop function check_estimated_rows(text);
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 5f4881d594..93a2c7b329 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -440,3 +440,38 @@ select * from
union all
select *, 1 as x from int8_tbl b) ss
where (x = 0) or (q1 >= q2 and q1 <= q2);
+
+-- Test handling of appendrel with different types which disables the path flattening and
+-- forces a subquery node. for the subquery node ensure the rowcounts are correct.
+create function check_estimated_rows(text) returns table (estimated int)
+language plpgsql as
+$$
+declare
+ ln text;
+ tmp text[];
+ first_row bool := true;
+begin
+ for ln in
+ execute format('explain %s', $1)
+ loop
+ tmp := regexp_match(ln, 'rows=(\d*)');
+ return query select tmp[1]::int;
+ end loop;
+end;
+$$;
+
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+
+explain (costs off)
+select *, 0::int from tenk1 a
+union all
+select *, 1::bigint from tenk1 b;
+
+select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;');
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+drop function check_estimated_rows(text);
--
2.25.1
Limit (cost=33975298.60..33975298.85 rows=100 width=88)
-> Sort (cost=33975298.60..33975304.25 rows=2262 width=88)
Sort Key: ('store channel'::text), (('store'::text ||
(ssr.s_store_id)::text))
-> MixedAggregate (cost=17212822.43..33975212.15 rows=2262 width=88)
Hash Key: ('store channel'::text), (('store'::text ||
(ssr.s_store_id)::text))
Hash Key: ('store channel'::text)
Group Key: ()
-> Append (cost=17212822.43..33974571.14 rows=20613 width=88)
-> Subquery Scan on ssr (cost=17212822.43..17213803.25
rows=201 width=88)
-> GroupAggregate (cost=17212822.43..17213800.24
rows=201 width=49)
Group Key: store.s_store_id
-> Sort (cost=17212822.43..17212985.07
rows=65053 width=49)
Sort Key: store.s_store_id
-> Hash Join (cost=1965.58..17207621.67
rows=65053 width=49)
Hash Cond: ("*SELECT* 1".store_sk =
store.s_store_sk)
-> Hash Join
(cost=1943.48..17206705.09 rows=65053 width=36)
Hash Cond: ("*SELECT*
1".date_sk = date_dim.d_date_sk)
-> Append
(cost=0.00..12848065.13 rows=316803342 width=40)
-> Subquery Scan on
"*SELECT* 1" (cost=0.00..10312104.14 rows=288004992 width=40)
-> Gather
(cost=0.00..5992029.26 rows=288004992 width=52)
Workers
Planned: 54
->
Parallel Seq Scan on store_sales (cost=0.00..5992029.26 rows=5333426 width=52)
-> Subquery Scan on
"*SELECT* 2" (cost=0.00..951944.28 rows=28798350 width=40)
-> Gather
(cost=0.00..519969.03 rows=28798350 width=52)
Workers
Planned: 54
->
Parallel Seq Scan on store_returns (cost=0.00..519969.03 rows=533303 width=52)
-> Hash
(cost=1943.29..1943.29 rows=15 width=4)
-> Gather
(cost=0.00..1943.29 rows=15 width=4)
Workers Planned:
54
-> Parallel Seq
Scan on date_dim (cost=0.00..1943.29 rows=1 width=4)
Filter:
((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
-> Hash (cost=17.07..17.07
rows=402 width=21)
-> Gather (cost=0.00..17.07
rows=402 width=21)
Workers Planned: 54
-> Parallel Seq Scan
on store (cost=0.00..17.07 rows=7 width=21)
-> Subquery Scan on csr (cost=9776975.68..9777973.55
rows=20400 width=88)
-> GroupAggregate (cost=9776975.68..9777667.55
rows=20400 width=49)
Group Key: catalog_page.cp_catalog_page_id
-> Sort (cost=9776975.68..9777056.99
rows=32525 width=49)
Sort Key: catalog_page.cp_catalog_page_id
-> Hash Join (cost=2652.26..9774538.05
rows=32525 width=49)
Hash Cond: ("*SELECT* 1_1".page_sk
= catalog_page.cp_catalog_page_sk)
-> Hash Join
(cost=1943.48..9773382.05 rows=32525 width=36)
Hash Cond: ("*SELECT*
1_1".date_sk = date_dim_1.d_date_sk)
-> Append
(cost=0.00..7593154.66 rows=158396994 width=40)
-> Subquery Scan on
"*SELECT* 1_1" (cost=0.00..6266326.91 rows=144001728 width=40)
-> Gather
(cost=0.00..4106300.99 rows=144001728 width=52)
Workers
Planned: 54
->
Parallel Seq Scan on catalog_sales (cost=0.00..4106300.99 rows=2666699
width=52)
-> Subquery Scan on
"*SELECT* 2_1" (cost=0.00..534842.78 rows=14395266 width=40)
-> Gather
(cost=0.00..318913.79 rows=14395266 width=52)
Workers
Planned: 54
->
Parallel Seq Scan on catalog_returns (cost=0.00..318913.79 rows=266579
width=52)
-> Hash
(cost=1943.29..1943.29 rows=15 width=4)
-> Gather
(cost=0.00..1943.29 rows=15 width=4)
Workers Planned:
54
-> Parallel Seq
Scan on date_dim date_dim_1 (cost=0.00..1943.29 rows=1 width=4)
Filter:
((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
-> Hash (cost=453.78..453.78
rows=20400 width=21)
-> Gather
(cost=0.00..453.78 rows=20400 width=21)
Workers Planned: 54
-> Parallel Seq Scan
on catalog_page (cost=0.00..453.78 rows=378 width=21)
-> Subquery Scan on wsr (cost=6982661.69..6982691.27
rows=12 width=88)
-> GroupAggregate (cost=6982661.69..6982691.09
rows=12 width=49)
Group Key: web_site.web_site_id
-> Sort (cost=6982661.69..6982666.57
rows=1952 width=49)
Sort Key: web_site.web_site_id
-> Hash Join (cost=1944.78..6982555.00
rows=1952 width=49)
Hash Cond: ("*SELECT*
1_2".wsr_web_site_sk = web_site.web_site_sk)
-> Hash Join
(cost=1943.48..6982473.18 rows=16265 width=36)
Hash Cond: ("*SELECT*
1_2".date_sk = date_dim_2.d_date_sk)
-> Append
(cost=0.00..5891230.26 rows=79209949 width=40)
-> Subquery Scan on
"*SELECT* 1_2" (cost=0.00..3149874.55 rows=72009168 width=40)
-> Gather
(cost=0.00..2069737.03 rows=72009168 width=52)
Workers
Planned: 54
->
Parallel Seq Scan on web_sales (cost=0.00..2069737.03 rows=1333503 width=52)
-> Subquery Scan on
"*SELECT* 2_2" (cost=2089739.58..2345305.96 rows=7200781 width=40)
-> Gather
(cost=2089739.58..2237294.24 rows=7200781 width=52)
Workers
Planned: 54
->
Parallel Hash Left Join (cost=2089739.58..2237294.24 rows=133348 width=52)
Hash
Cond: ((web_returns.wr_item_sk = web_sales_1.ws_item_sk) AND
(web_returns.wr_order_number = web_sales_1.ws_order_number))
->
Parallel Seq Scan on web_returns (cost=0.00..142887.48 rows=133348 width=28)
->
Parallel Hash (cost=2069737.03..2069737.03 rows=1333503 width=12)
-> Parallel Seq Scan on web_sales web_sales_1 (cost=0.00..2069737.03
rows=1333503 width=12)
-> Hash
(cost=1943.29..1943.29 rows=15 width=4)
-> Gather
(cost=0.00..1943.29 rows=15 width=4)
Workers Planned:
54
-> Parallel Seq
Scan on date_dim date_dim_2 (cost=0.00..1943.29 rows=1 width=4)
Filter:
((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
-> Hash (cost=1.00..1.00 rows=24
width=21)
-> Gather (cost=0.00..1.00
rows=24 width=21)
Workers Planned: 54
-> Parallel Seq Scan
on web_site (cost=0.00..1.00 rows=1 width=21)
Limit (cost=15600476.15..15600476.40 rows=100 width=88)
-> Sort (cost=15600476.15..15600481.81 rows=2262 width=88)
Sort Key: ('store channel'::text), (('store'::text ||
(ssr.s_store_id)::text))
-> MixedAggregate (cost=6712029.63..15600389.70 rows=2262 width=88)
Hash Key: ('store channel'::text), (('store'::text ||
(ssr.s_store_id)::text))
Hash Key: ('store channel'::text)
Group Key: ()
-> Append (cost=6712029.63..15599748.69 rows=20613 width=88)
-> Subquery Scan on ssr (cost=6712029.63..6712531.30
rows=201 width=88)
-> Finalize GroupAggregate
(cost=6712029.63..6712528.29 rows=201 width=49)
Group Key: store.s_store_id
-> Gather Merge (cost=6712029.63..6712390.60
rows=10854 width=49)
Workers Planned: 54
-> Partial GroupAggregate
(cost=6712028.04..6712048.12 rows=201 width=49)
Group Key: store.s_store_id
-> Sort
(cost=6712028.04..6712031.05 rows=1205 width=49)
Sort Key: store.s_store_id
-> Parallel Hash Join
(cost=1960.47..6711966.37 rows=1205 width=49)
Hash Cond: ("*SELECT*
1".store_sk = store.s_store_sk)
-> Parallel Hash Join
(cost=1943.30..6711944.48 rows=1205 width=36)
Hash Cond:
("*SELECT* 1".date_sk = date_dim.d_date_sk)
-> Parallel
Append (cost=0.00..6629332.86 rows=5866729 width=40)
->
Subquery Scan on "*SELECT* 1" (cost=0.00..6072030.64 rows=5333426 width=40)
->
Parallel Seq Scan on store_sales (cost=0.00..5992029.26 rows=5333426 width=52)
->
Subquery Scan on "*SELECT* 2" (cost=0.00..527968.57 rows=533303 width=40)
->
Parallel Seq Scan on store_returns (cost=0.00..519969.03 rows=533303 width=52)
-> Parallel Hash
(cost=1943.29..1943.29 rows=1 width=4)
->
Parallel Seq Scan on date_dim (cost=0.00..1943.29 rows=1 width=4)
Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
-> Parallel Hash
(cost=17.07..17.07 rows=7 width=21)
-> Parallel Seq
Scan on store (cost=0.00..17.07 rows=7 width=21)
-> Subquery Scan on csr (cost=4526646.86..4528599.23
rows=20400 width=88)
-> Finalize GroupAggregate
(cost=4526646.86..4528293.23 rows=20400 width=49)
Group Key: catalog_page.cp_catalog_page_id
-> Gather Merge (cost=4526646.86..4527682.88
rows=32508 width=49)
Workers Planned: 54
-> Partial GroupAggregate
(cost=4526645.27..4526660.32 rows=602 width=49)
Group Key:
catalog_page.cp_catalog_page_id
-> Sort
(cost=4526645.27..4526646.77 rows=602 width=49)
Sort Key:
catalog_page.cp_catalog_page_id
-> Parallel Hash Join
(cost=2401.81..4526617.48 rows=602 width=49)
Hash Cond: ("*SELECT*
1_1".page_sk = catalog_page.cp_catalog_page_sk)
-> Parallel Hash Join
(cost=1943.30..4526156.60 rows=602 width=36)
Hash Cond:
("*SELECT* 1_1".date_sk = date_dim_1.d_date_sk)
-> Parallel
Append (cost=0.00..4483880.33 rows=2933278 width=40)
->
Subquery Scan on "*SELECT* 1_1" (cost=0.00..4146301.47 rows=2666699 width=40)
->
Parallel Seq Scan on catalog_sales (cost=0.00..4106300.99 rows=2666699
width=52)
->
Subquery Scan on "*SELECT* 2_1" (cost=0.00..322912.47 rows=266579 width=40)
->
Parallel Seq Scan on catalog_returns (cost=0.00..318913.79 rows=266579
width=52)
-> Parallel Hash
(cost=1943.29..1943.29 rows=1 width=4)
->
Parallel Seq Scan on date_dim date_dim_1 (cost=0.00..1943.29 rows=1 width=4)
Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
-> Parallel Hash
(cost=453.78..453.78 rows=378 width=21)
-> Parallel Seq
Scan on catalog_page (cost=0.00..453.78 rows=378 width=21)
-> Subquery Scan on wsr (cost=4358485.68..4358515.10
rows=12 width=88)
-> Finalize GroupAggregate
(cost=4358485.68..4358514.92 rows=12 width=49)
Group Key: web_site.web_site_id
-> Gather Merge (cost=4358485.68..4358506.70
rows=648 width=49)
Workers Planned: 54
-> Partial GroupAggregate
(cost=4358484.09..4358484.75 rows=12 width=49)
Group Key: web_site.web_site_id
-> Sort
(cost=4358484.09..4358484.18 rows=36 width=49)
Sort Key: web_site.web_site_id
-> Parallel Hash Join
(cost=1944.32..4358483.16 rows=36 width=49)
Hash Cond: ("*SELECT*
2_2".wsr_web_site_sk = web_site.web_site_sk)
-> Parallel Hash Join
(cost=1943.30..4358481.00 rows=301 width=36)
Hash Cond:
("*SELECT* 2_2".date_sk = date_dim_2.d_date_sk)
-> Parallel
Append (cost=0.00..4336368.29 rows=1466851 width=40)
->
Subquery Scan on "*SELECT* 2_2" (cost=2089739.58..2239294.46 rows=133348
width=40)
->
Parallel Hash Left Join (cost=2089739.58..2237294.24 rows=133348 width=52)
Hash Cond: ((web_returns.wr_item_sk = web_sales.ws_item_sk) AND
(web_returns.wr_order_number = web_sales.ws_order_number))
-> Parallel Seq Scan on web_returns (cost=0.00..142887.48 rows=133348
width=28)
-> Parallel Hash (cost=2069737.03..2069737.03 rows=1333503 width=12)
-> Parallel Seq Scan on web_sales (cost=0.00..2069737.03 rows=1333503
width=12)
->
Subquery Scan on "*SELECT* 1_2" (cost=0.00..2089739.58 rows=1333503 width=40)
->
Parallel Seq Scan on web_sales web_sales_1 (cost=0.00..2069737.03 rows=1333503
width=52)
-> Parallel Hash
(cost=1943.29..1943.29 rows=1 width=4)
->
Parallel Seq Scan on date_dim date_dim_2 (cost=0.00..1943.29 rows=1 width=4)
Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
-> Parallel Hash
(cost=1.00..1.00 rows=1 width=21)
-> Parallel Seq
Scan on web_site (cost=0.00..1.00 rows=1 width=21)