Re: [HACKERS] Partition-wise aggregation/grouping

Jeevan Chalke Thu, 12 Oct 2017 23:38:12 -0700

On Tue, Oct 10, 2017 at 1:31 PM, David Rowley <david.row...@2ndquadrant.com>
wrote:


> On 10 October 2017 at 17:57, Ashutosh Bapat
> <ashutosh.ba...@enterprisedb.com> wrote:
> > Append node just returns the result of ExecProcNode(). Charging
> > cpu_tuple_cost may make it too expensive. In other places where we
> > charge cpu_tuple_cost there's some processing done to the tuple like
> > ExecStoreTuple() in SeqNext(). May be we need some other measure for
> > Append's processing of the tuple.
>
> I don't think there's any need to invent any new GUC. You could just
> divide cpu_tuple_cost by something.
>
> I did a quick benchmark on my laptop to see how much Append really
> costs, and with the standard costs the actual cost seems to be about
> cpu_tuple_cost / 2.4. So probably cpu_tuple_cost / 2 might be
> realistic. create_set_projection_path() does something similar and
> brincostestimate() does some similar magic and applies 0.1 *
> cpu_operator_cost to the total cost.
>
> # create table p (a int, b int);
> # create table p1 () inherits (p);
> # insert into p1 select generate_series(1,1000000);
> # vacuum analyze p1;
> # \q
> $ echo "select count(*) from p1;" > p1.sql
> $ echo "select count(*) from p;" > p.sql
> $ pgbench -T 60 -f p1.sql -n
>
> latency average = 58.567 ms
>
> $ pgbench -T 60 -f p.sql -n
> latency average = 72.984 ms
>
> $ psql
> psql (11devel)
> Type "help" for help.
>
> # -- check the cost of the plan.
> # explain select count(*) from p1;
>                             QUERY PLAN
> ------------------------------------------------------------------
>  Aggregate  (cost=16925.00..16925.01 rows=1 width=8)
>    ->  Seq Scan on p1  (cost=0.00..14425.00 rows=1000000 width=0)
> (2 rows)
>
> # -- selecting from the parent is the same due to zero Append cost.
> # explain select count(*) from p;
>                                QUERY PLAN
> ------------------------------------------------------------------------
>  Aggregate  (cost=16925.00..16925.01 rows=1 width=8)
>    ->  Append  (cost=0.00..14425.00 rows=1000001 width=0)
>          ->  Seq Scan on p  (cost=0.00..0.00 rows=1 width=0)
>          ->  Seq Scan on p1  (cost=0.00..14425.00 rows=1000000 width=0)
> (4 rows)
>
> # -- extrapolate the additional time taken for the Append scan and
> work out what the planner
> # -- should add to the plan's cost, then divide by the number of rows
> in p1 to work out the
> # -- tuple cost of pulling a row through the append.
> # select (16925.01 * (72.984 / 58.567) - 16925.01)  / 1000000;
>         ?column?
> ------------------------
>  0.00416630302337493743
> (1 row)
>
> # show cpu_tuple_cost;
>  cpu_tuple_cost
> ----------------
>  0.01
> (1 row)
>
> # -- How does that compare to the cpu_tuple_cost?
> # select current_Setting('cpu_tuple_cost')::float8 /
> 0.00416630302337493743;
>     ?column?
> ----------------
>  2.400209476818
> (1 row)
>
> Maybe it's worth trying with different row counts to see if the
> additional cost is consistent, but it's probably not worth being too
> critical here.
>

I have tried exactly same tests to get to this factor on my local developer
machine. And with parallelism enabled I got this number as 7.9. However, if
I disable the parallelism (and I believe David too disabled that), I get
this number as 1.8. Whereas for 10000 rows, I get this number to 1.7

-- With Gather
# select current_Setting('cpu_tuple_cost')::float8 / ((10633.56 * (81.035 /
72.450) - 10633.56)  / 1000000);
7.9

-- Without Gather
# select current_Setting('cpu_tuple_cost')::float8 / ((16925.01 * (172.838
/ 131.400) - 16925.01)  / 1000000);
1.8

-- With 10000 rows (so no Gather too)
# select current_Setting('cpu_tuple_cost')::float8 / ((170.01 * (1.919 /
1.424) - 170.01)  / 10000);
1.7

So it is not so straight forward to come up the correct heuristic here.
Thus using 50% of cpu_tuple_cost look good to me here.

As suggested by Ashutosh and Robert, attached separate small WIP patch for
it.

I think it will be better if we take this topic on another mail-thread.
Do you agree?


>
> --
>  David Rowley                   http://www.2ndQuadrant.com/
>  PostgreSQL Development, 24x7 Support, Training & Services
>



-- 
Jeevan Chalke
Technical Architect, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index ce32b8a4..c0bf602 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1742,6 +1742,33 @@ cost_sort(Path *path, PlannerInfo *root,
 }
 
 /*
+ * cost_append
+ *	  Determines and returns the cost of an Append node.
+ *
+ * Though Append doesn't do any selection or projection, it's not free.  So we
+ * try to add cost per input tuple which is arbitrarily calculated as
+ * DEFAULT_APPEND_COST_FACTOR * cpu_tuple_cost.
+ *
+ * 'input_startup_cost' is the sum of the input streams' startup costs
+ * 'input_total_cost' is the sum of the input streams' total costs
+ * 'tuples' is the number of tuples in all the streams
+ */
+void
+cost_append(Path *path,
+			Cost input_startup_cost, Cost input_total_cost,
+			double tuples)
+{
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+
+	/* Add Append node overhead. */
+	run_cost += cpu_tuple_cost * DEFAULT_APPEND_COST_FACTOR * tuples;
+
+	path->startup_cost = startup_cost + input_startup_cost;
+	path->total_cost = startup_cost + run_cost + input_total_cost;
+}
+
+/*
  * cost_merge_append
  *	  Determines and returns the cost of a MergeAppend node.
  *
@@ -1800,6 +1827,9 @@ cost_merge_append(Path *path, PlannerInfo *root,
 	 */
 	run_cost += cpu_operator_cost * tuples;
 
+	/* Add MergeAppend node overhead like we do it for the Append node */
+	run_cost += cpu_tuple_cost * DEFAULT_APPEND_COST_FACTOR * tuples;
+
 	path->startup_cost = startup_cost + input_startup_cost;
 	path->total_cost = startup_cost + run_cost + input_total_cost;
 }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 2d491eb..c8fdf1c 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1212,6 +1212,8 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer,
 				   int parallel_workers, List *partitioned_rels)
 {
 	AppendPath *pathnode = makeNode(AppendPath);
+	Cost		input_startup_cost;
+	Cost		input_total_cost;
 	ListCell   *l;
 
 	pathnode->path.pathtype = T_Append;
@@ -1227,32 +1229,31 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer,
 	pathnode->subpaths = subpaths;
 
 	/*
-	 * We don't bother with inventing a cost_append(), but just do it here.
-	 *
-	 * Compute rows and costs as sums of subplan rows and costs.  We charge
-	 * nothing extra for the Append itself, which perhaps is too optimistic,
-	 * but since it doesn't do any selection or projection, it is a pretty
-	 * cheap node.
+	 * Add up the sizes and costs of the input paths.
 	 */
 	pathnode->path.rows = 0;
-	pathnode->path.startup_cost = 0;
-	pathnode->path.total_cost = 0;
+	input_startup_cost = 0;
+	input_total_cost = 0;
 	foreach(l, subpaths)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
 
 		pathnode->path.rows += subpath->rows;
-
-		if (l == list_head(subpaths))	/* first node? */
-			pathnode->path.startup_cost = subpath->startup_cost;
-		pathnode->path.total_cost += subpath->total_cost;
 		pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
 			subpath->parallel_safe;
 
+		if (l == list_head(subpaths))	/* first node? */
+			input_startup_cost = subpath->startup_cost;
+		input_total_cost += subpath->total_cost;
+
 		/* All child paths must have same parameterization */
 		Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
 	}
 
+	/* Now we can compute total costs of the Append */
+	cost_append(&pathnode->path, input_startup_cost, input_total_cost,
+				pathnode->path.rows);
+
 	return pathnode;
 }
 
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 306d923..cd42e5a 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -31,6 +31,12 @@
 
 #define DEFAULT_EFFECTIVE_CACHE_SIZE  524288	/* measured in pages */
 
+/*
+ * Arbitrarily use 50% of the cpu_tuple_cost to cost append node. Note that
+ * this value should be multiplied with cpu_tuple_cost wherever applicable.
+ */
+#define DEFAULT_APPEND_COST_FACTOR 0.5
+
 typedef enum
 {
 	CONSTRAINT_EXCLUSION_OFF,	/* do not use c_e */
@@ -106,6 +112,9 @@ extern void cost_sort(Path *path, PlannerInfo *root,
 		  List *pathkeys, Cost input_cost, double tuples, int width,
 		  Cost comparison_cost, int sort_mem,
 		  double limit_tuples);
+extern void cost_append(Path *path,
+			Cost input_startup_cost, Cost input_total_cost,
+			double tuples);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 				  List *pathkeys, int n_streams,
 				  Cost input_startup_cost, Cost input_total_cost,

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Partition-wise aggregation/grouping

Reply via email to