allow partial union-all and improve parallel subquery costing

Luc Vlaming Tue, 13 Oct 2020 01:58:21 -0700

Hi,

While developing some improvements for TPC-DS queries I found out that with
UNION ALL partial paths are not emitted. Whilst fixing that I also came across
the subquery costing which does not seem to consider parallelism when doing
the costing.


I added a simplified testcase in pg-regress to show this goes wrong, and
attached also a before and after explain output of tpc-ds SF100 query 5
based on version 12.4.

I hope I followed all etiquette and these kind of improvements are welcome.

Kind regards,
Luc
Swarm64

From 651999e07735f7dca887b4b672a008620515e857 Mon Sep 17 00:00:00 2001
From: Luc Vlaming <l...@swarm64.com>
Date: Tue, 13 Oct 2020 09:35:33 +0200
Subject: [PATCH v1] Allow partial UNION ALL; improve parallel subquery costing

---
 src/backend/optimizer/path/costsize.c  | 11 ++++++
 src/backend/optimizer/prep/prepunion.c |  4 +++
 src/test/regress/expected/union.out    | 50 ++++++++++++++++++++++++++
 src/test/regress/sql/union.sql         | 35 ++++++++++++++++++
 4 files changed, 100 insertions(+)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a2a9b1f7be..b05efb8bd5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1316,6 +1316,17 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
 	startup_cost += path->path.pathtarget->cost.startup;
 	run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
 
+	/* Adjust costing for parallelism, if used. */
+	if (path->path.parallel_workers > 0)
+	{
+		double		parallel_divisor = get_parallel_divisor(&path->path);
+
+		path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
+
+		/* The CPU cost is divided among all the workers. */
+		run_cost /= parallel_divisor;
+	}
+
 	path->path.startup_cost += startup_cost;
 	path->path.total_cost += startup_cost + run_cost;
 }
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index cd9d49c1f7..99da4297e6 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -675,6 +675,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 							   NIL, NULL,
 							   parallel_workers, enable_parallel_append,
 							   NIL, -1);
+
+		if (op->all && enable_parallel_append)
+			add_partial_path(result_rel, ppath);
+
 		ppath = (Path *)
 			create_gather_path(root, result_rel, ppath,
 							   result_rel->reltarget, NULL, NULL);
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 7189f5bd3d..77a7e5e759 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1052,3 +1052,53 @@ where (x = 0) or (q1 >= q2 and q1 <= q2);
  4567890123456789 |  4567890123456789 | 1
 (6 rows)
 
+-- Test handling of appendrel with different types which disables the path flattening and
+-- forces a subquery node. for the subquery node ensure the rowcounts are correct.
+create function check_estimated_rows(text) returns table (estimated int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain %s', $1)
+    loop
+        tmp := regexp_match(ln, 'rows=(\d*)');
+        return query select tmp[1]::int;
+    end loop;
+end;
+$$;
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+explain (costs off)
+select *, 0::int from tenk1 a
+union all
+select *, 1::bigint from tenk1 b;
+                   QUERY PLAN                   
+------------------------------------------------
+ Gather
+   Workers Planned: 2
+   ->  Parallel Append
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Parallel Seq Scan on tenk1 a
+         ->  Parallel Seq Scan on tenk1 b
+(6 rows)
+
+select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;');
+ check_estimated_rows 
+----------------------
+                19990
+                     
+                 8330
+                 4165
+                 4165
+                 4165
+(6 rows)
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+drop function check_estimated_rows(text);
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 5f4881d594..93a2c7b329 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -440,3 +440,38 @@ select * from
    union all
    select *, 1 as x from int8_tbl b) ss
 where (x = 0) or (q1 >= q2 and q1 <= q2);
+
+-- Test handling of appendrel with different types which disables the path flattening and
+-- forces a subquery node. for the subquery node ensure the rowcounts are correct.
+create function check_estimated_rows(text) returns table (estimated int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain %s', $1)
+    loop
+        tmp := regexp_match(ln, 'rows=(\d*)');
+        return query select tmp[1]::int;
+    end loop;
+end;
+$$;
+
+set min_parallel_table_scan_size = 0;
+set parallel_setup_cost = 0;
+set parallel_tuple_cost = 0;
+
+explain (costs off)
+select *, 0::int from tenk1 a
+union all
+select *, 1::bigint from tenk1 b;
+
+select check_estimated_rows('select *, 0::int from tenk1 a union all select *, 1::bigint from tenk1 b;');
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+drop function check_estimated_rows(text);
-- 
2.25.1

Limit  (cost=33975298.60..33975298.85 rows=100 width=88)
  ->  Sort  (cost=33975298.60..33975304.25 rows=2262 width=88)
        Sort Key: ('store channel'::text), (('store'::text || 
(ssr.s_store_id)::text))
        ->  MixedAggregate  (cost=17212822.43..33975212.15 rows=2262 width=88)
              Hash Key: ('store channel'::text), (('store'::text || 
(ssr.s_store_id)::text))
              Hash Key: ('store channel'::text)
              Group Key: ()
              ->  Append  (cost=17212822.43..33974571.14 rows=20613 width=88)
                    ->  Subquery Scan on ssr  (cost=17212822.43..17213803.25 
rows=201 width=88)
                          ->  GroupAggregate  (cost=17212822.43..17213800.24 
rows=201 width=49)
                                Group Key: store.s_store_id
                                ->  Sort  (cost=17212822.43..17212985.07 
rows=65053 width=49)
                                      Sort Key: store.s_store_id
                                      ->  Hash Join  (cost=1965.58..17207621.67 
rows=65053 width=49)
                                            Hash Cond: ("*SELECT* 1".store_sk = 
store.s_store_sk)
                                            ->  Hash Join  
(cost=1943.48..17206705.09 rows=65053 width=36)
                                                  Hash Cond: ("*SELECT* 
1".date_sk = date_dim.d_date_sk)
                                                  ->  Append  
(cost=0.00..12848065.13 rows=316803342 width=40)
                                                        ->  Subquery Scan on 
"*SELECT* 1"  (cost=0.00..10312104.14 rows=288004992 width=40)
                                                              ->  Gather  
(cost=0.00..5992029.26 rows=288004992 width=52)
                                                                    Workers 
Planned: 54
                                                                    ->  
Parallel Seq Scan on store_sales  (cost=0.00..5992029.26 rows=5333426 width=52)
                                                        ->  Subquery Scan on 
"*SELECT* 2"  (cost=0.00..951944.28 rows=28798350 width=40)
                                                              ->  Gather  
(cost=0.00..519969.03 rows=28798350 width=52)
                                                                    Workers 
Planned: 54
                                                                    ->  
Parallel Seq Scan on store_returns  (cost=0.00..519969.03 rows=533303 width=52)
                                                  ->  Hash  
(cost=1943.29..1943.29 rows=15 width=4)
                                                        ->  Gather  
(cost=0.00..1943.29 rows=15 width=4)
                                                              Workers Planned: 
54
                                                              ->  Parallel Seq 
Scan on date_dim  (cost=0.00..1943.29 rows=1 width=4)
                                                                    Filter: 
((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
                                            ->  Hash  (cost=17.07..17.07 
rows=402 width=21)
                                                  ->  Gather  (cost=0.00..17.07 
rows=402 width=21)
                                                        Workers Planned: 54
                                                        ->  Parallel Seq Scan 
on store  (cost=0.00..17.07 rows=7 width=21)
                    ->  Subquery Scan on csr  (cost=9776975.68..9777973.55 
rows=20400 width=88)
                          ->  GroupAggregate  (cost=9776975.68..9777667.55 
rows=20400 width=49)
                                Group Key: catalog_page.cp_catalog_page_id
                                ->  Sort  (cost=9776975.68..9777056.99 
rows=32525 width=49)
                                      Sort Key: catalog_page.cp_catalog_page_id
                                      ->  Hash Join  (cost=2652.26..9774538.05 
rows=32525 width=49)
                                            Hash Cond: ("*SELECT* 1_1".page_sk 
= catalog_page.cp_catalog_page_sk)
                                            ->  Hash Join  
(cost=1943.48..9773382.05 rows=32525 width=36)
                                                  Hash Cond: ("*SELECT* 
1_1".date_sk = date_dim_1.d_date_sk)
                                                  ->  Append  
(cost=0.00..7593154.66 rows=158396994 width=40)
                                                        ->  Subquery Scan on 
"*SELECT* 1_1"  (cost=0.00..6266326.91 rows=144001728 width=40)
                                                              ->  Gather  
(cost=0.00..4106300.99 rows=144001728 width=52)
                                                                    Workers 
Planned: 54
                                                                    ->  
Parallel Seq Scan on catalog_sales  (cost=0.00..4106300.99 rows=2666699 
width=52)
                                                        ->  Subquery Scan on 
"*SELECT* 2_1"  (cost=0.00..534842.78 rows=14395266 width=40)
                                                              ->  Gather  
(cost=0.00..318913.79 rows=14395266 width=52)
                                                                    Workers 
Planned: 54
                                                                    ->  
Parallel Seq Scan on catalog_returns  (cost=0.00..318913.79 rows=266579 
width=52)
                                                  ->  Hash  
(cost=1943.29..1943.29 rows=15 width=4)
                                                        ->  Gather  
(cost=0.00..1943.29 rows=15 width=4)
                                                              Workers Planned: 
54
                                                              ->  Parallel Seq 
Scan on date_dim date_dim_1  (cost=0.00..1943.29 rows=1 width=4)
                                                                    Filter: 
((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
                                            ->  Hash  (cost=453.78..453.78 
rows=20400 width=21)
                                                  ->  Gather  
(cost=0.00..453.78 rows=20400 width=21)
                                                        Workers Planned: 54
                                                        ->  Parallel Seq Scan 
on catalog_page  (cost=0.00..453.78 rows=378 width=21)
                    ->  Subquery Scan on wsr  (cost=6982661.69..6982691.27 
rows=12 width=88)
                          ->  GroupAggregate  (cost=6982661.69..6982691.09 
rows=12 width=49)
                                Group Key: web_site.web_site_id
                                ->  Sort  (cost=6982661.69..6982666.57 
rows=1952 width=49)
                                      Sort Key: web_site.web_site_id
                                      ->  Hash Join  (cost=1944.78..6982555.00 
rows=1952 width=49)
                                            Hash Cond: ("*SELECT* 
1_2".wsr_web_site_sk = web_site.web_site_sk)
                                            ->  Hash Join  
(cost=1943.48..6982473.18 rows=16265 width=36)
                                                  Hash Cond: ("*SELECT* 
1_2".date_sk = date_dim_2.d_date_sk)
                                                  ->  Append  
(cost=0.00..5891230.26 rows=79209949 width=40)
                                                        ->  Subquery Scan on 
"*SELECT* 1_2"  (cost=0.00..3149874.55 rows=72009168 width=40)
                                                              ->  Gather  
(cost=0.00..2069737.03 rows=72009168 width=52)
                                                                    Workers 
Planned: 54
                                                                    ->  
Parallel Seq Scan on web_sales  (cost=0.00..2069737.03 rows=1333503 width=52)
                                                        ->  Subquery Scan on 
"*SELECT* 2_2"  (cost=2089739.58..2345305.96 rows=7200781 width=40)
                                                              ->  Gather  
(cost=2089739.58..2237294.24 rows=7200781 width=52)
                                                                    Workers 
Planned: 54
                                                                    ->  
Parallel Hash Left Join  (cost=2089739.58..2237294.24 rows=133348 width=52)
                                                                          Hash 
Cond: ((web_returns.wr_item_sk = web_sales_1.ws_item_sk) AND 
(web_returns.wr_order_number = web_sales_1.ws_order_number))
                                                                          ->  
Parallel Seq Scan on web_returns  (cost=0.00..142887.48 rows=133348 width=28)
                                                                          ->  
Parallel Hash  (cost=2069737.03..2069737.03 rows=1333503 width=12)
                                                                                
->  Parallel Seq Scan on web_sales web_sales_1  (cost=0.00..2069737.03 
rows=1333503 width=12)
                                                  ->  Hash  
(cost=1943.29..1943.29 rows=15 width=4)
                                                        ->  Gather  
(cost=0.00..1943.29 rows=15 width=4)
                                                              Workers Planned: 
54
                                                              ->  Parallel Seq 
Scan on date_dim date_dim_2  (cost=0.00..1943.29 rows=1 width=4)
                                                                    Filter: 
((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
                                            ->  Hash  (cost=1.00..1.00 rows=24 
width=21)
                                                  ->  Gather  (cost=0.00..1.00 
rows=24 width=21)
                                                        Workers Planned: 54
                                                        ->  Parallel Seq Scan 
on web_site  (cost=0.00..1.00 rows=1 width=21)

Limit  (cost=15600476.15..15600476.40 rows=100 width=88)
  ->  Sort  (cost=15600476.15..15600481.81 rows=2262 width=88)
        Sort Key: ('store channel'::text), (('store'::text || 
(ssr.s_store_id)::text))
        ->  MixedAggregate  (cost=6712029.63..15600389.70 rows=2262 width=88)
              Hash Key: ('store channel'::text), (('store'::text || 
(ssr.s_store_id)::text))
              Hash Key: ('store channel'::text)
              Group Key: ()
              ->  Append  (cost=6712029.63..15599748.69 rows=20613 width=88)
                    ->  Subquery Scan on ssr  (cost=6712029.63..6712531.30 
rows=201 width=88)
                          ->  Finalize GroupAggregate  
(cost=6712029.63..6712528.29 rows=201 width=49)
                                Group Key: store.s_store_id
                                ->  Gather Merge  (cost=6712029.63..6712390.60 
rows=10854 width=49)
                                      Workers Planned: 54
                                      ->  Partial GroupAggregate  
(cost=6712028.04..6712048.12 rows=201 width=49)
                                            Group Key: store.s_store_id
                                            ->  Sort  
(cost=6712028.04..6712031.05 rows=1205 width=49)
                                                  Sort Key: store.s_store_id
                                                  ->  Parallel Hash Join  
(cost=1960.47..6711966.37 rows=1205 width=49)
                                                        Hash Cond: ("*SELECT* 
1".store_sk = store.s_store_sk)
                                                        ->  Parallel Hash Join  
(cost=1943.30..6711944.48 rows=1205 width=36)
                                                              Hash Cond: 
("*SELECT* 1".date_sk = date_dim.d_date_sk)
                                                              ->  Parallel 
Append  (cost=0.00..6629332.86 rows=5866729 width=40)
                                                                    ->  
Subquery Scan on "*SELECT* 1"  (cost=0.00..6072030.64 rows=5333426 width=40)
                                                                          ->  
Parallel Seq Scan on store_sales  (cost=0.00..5992029.26 rows=5333426 width=52)
                                                                    ->  
Subquery Scan on "*SELECT* 2"  (cost=0.00..527968.57 rows=533303 width=40)
                                                                          ->  
Parallel Seq Scan on store_returns  (cost=0.00..519969.03 rows=533303 width=52)
                                                              ->  Parallel Hash 
 (cost=1943.29..1943.29 rows=1 width=4)
                                                                    ->  
Parallel Seq Scan on date_dim  (cost=0.00..1943.29 rows=1 width=4)
                                                                          
Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
                                                        ->  Parallel Hash  
(cost=17.07..17.07 rows=7 width=21)
                                                              ->  Parallel Seq 
Scan on store  (cost=0.00..17.07 rows=7 width=21)
                    ->  Subquery Scan on csr  (cost=4526646.86..4528599.23 
rows=20400 width=88)
                          ->  Finalize GroupAggregate  
(cost=4526646.86..4528293.23 rows=20400 width=49)
                                Group Key: catalog_page.cp_catalog_page_id
                                ->  Gather Merge  (cost=4526646.86..4527682.88 
rows=32508 width=49)
                                      Workers Planned: 54
                                      ->  Partial GroupAggregate  
(cost=4526645.27..4526660.32 rows=602 width=49)
                                            Group Key: 
catalog_page.cp_catalog_page_id
                                            ->  Sort  
(cost=4526645.27..4526646.77 rows=602 width=49)
                                                  Sort Key: 
catalog_page.cp_catalog_page_id
                                                  ->  Parallel Hash Join  
(cost=2401.81..4526617.48 rows=602 width=49)
                                                        Hash Cond: ("*SELECT* 
1_1".page_sk = catalog_page.cp_catalog_page_sk)
                                                        ->  Parallel Hash Join  
(cost=1943.30..4526156.60 rows=602 width=36)
                                                              Hash Cond: 
("*SELECT* 1_1".date_sk = date_dim_1.d_date_sk)
                                                              ->  Parallel 
Append  (cost=0.00..4483880.33 rows=2933278 width=40)
                                                                    ->  
Subquery Scan on "*SELECT* 1_1"  (cost=0.00..4146301.47 rows=2666699 width=40)
                                                                          ->  
Parallel Seq Scan on catalog_sales  (cost=0.00..4106300.99 rows=2666699 
width=52)
                                                                    ->  
Subquery Scan on "*SELECT* 2_1"  (cost=0.00..322912.47 rows=266579 width=40)
                                                                          ->  
Parallel Seq Scan on catalog_returns  (cost=0.00..318913.79 rows=266579 
width=52)
                                                              ->  Parallel Hash 
 (cost=1943.29..1943.29 rows=1 width=4)
                                                                    ->  
Parallel Seq Scan on date_dim date_dim_1  (cost=0.00..1943.29 rows=1 width=4)
                                                                          
Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
                                                        ->  Parallel Hash  
(cost=453.78..453.78 rows=378 width=21)
                                                              ->  Parallel Seq 
Scan on catalog_page  (cost=0.00..453.78 rows=378 width=21)
                    ->  Subquery Scan on wsr  (cost=4358485.68..4358515.10 
rows=12 width=88)
                          ->  Finalize GroupAggregate  
(cost=4358485.68..4358514.92 rows=12 width=49)
                                Group Key: web_site.web_site_id
                                ->  Gather Merge  (cost=4358485.68..4358506.70 
rows=648 width=49)
                                      Workers Planned: 54
                                      ->  Partial GroupAggregate  
(cost=4358484.09..4358484.75 rows=12 width=49)
                                            Group Key: web_site.web_site_id
                                            ->  Sort  
(cost=4358484.09..4358484.18 rows=36 width=49)
                                                  Sort Key: web_site.web_site_id
                                                  ->  Parallel Hash Join  
(cost=1944.32..4358483.16 rows=36 width=49)
                                                        Hash Cond: ("*SELECT* 
2_2".wsr_web_site_sk = web_site.web_site_sk)
                                                        ->  Parallel Hash Join  
(cost=1943.30..4358481.00 rows=301 width=36)
                                                              Hash Cond: 
("*SELECT* 2_2".date_sk = date_dim_2.d_date_sk)
                                                              ->  Parallel 
Append  (cost=0.00..4336368.29 rows=1466851 width=40)
                                                                    ->  
Subquery Scan on "*SELECT* 2_2"  (cost=2089739.58..2239294.46 rows=133348 
width=40)
                                                                          ->  
Parallel Hash Left Join  (cost=2089739.58..2237294.24 rows=133348 width=52)
                                                                                
Hash Cond: ((web_returns.wr_item_sk = web_sales.ws_item_sk) AND 
(web_returns.wr_order_number = web_sales.ws_order_number))
                                                                                
->  Parallel Seq Scan on web_returns  (cost=0.00..142887.48 rows=133348 
width=28)
                                                                                
->  Parallel Hash  (cost=2069737.03..2069737.03 rows=1333503 width=12)
                                                                                
      ->  Parallel Seq Scan on web_sales  (cost=0.00..2069737.03 rows=1333503 
width=12)
                                                                    ->  
Subquery Scan on "*SELECT* 1_2"  (cost=0.00..2089739.58 rows=1333503 width=40)
                                                                          ->  
Parallel Seq Scan on web_sales web_sales_1  (cost=0.00..2069737.03 rows=1333503 
width=52)
                                                              ->  Parallel Hash 
 (cost=1943.29..1943.29 rows=1 width=4)
                                                                    ->  
Parallel Seq Scan on date_dim date_dim_2  (cost=0.00..1943.29 rows=1 width=4)
                                                                          
Filter: ((d_date >= '2000-08-27'::date) AND (d_date <= '2000-09-10'::date))
                                                        ->  Parallel Hash  
(cost=1.00..1.00 rows=1 width=21)
                                                              ->  Parallel Seq 
Scan on web_site  (cost=0.00..1.00 rows=1 width=21)

allow partial union-all and improve parallel subquery costing

Reply via email to