(2018/11/28 13:38), Etsuro Fujita wrote:
> BTW another thing I noticed is this comment on costing aggregate
> pushdown paths using local statistics in estimate_path_cost_size:
> 
>               * Also, core does not care about costing HAVING expressions and
>               * adding that to the costs.  So similarly, here too we are not
>               * considering remote and local conditions for costing.
> 
> I think this was true when aggregate pushdown went in, but isn't anymore
> because of commit 7b6c07547190f056b0464098bb5a2247129d7aa2.  So we
> should update estimate_path_cost_size so that it accounts for the
> selectivity and cost of the HAVING expressions as well?

There seems to be no objections, I updated the patch as such.  Attached
is an updated version of the patch.

Best regards,
Etsuro Fujita
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index e653c30..dfa6201 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3209,6 +3209,8 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
          Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
 (6 rows)
 
+-- Update local stats on ft2
+ANALYZE ft2;
 -- Add into extension
 alter extension postgres_fdw add operator class my_op_class using btree;
 alter extension postgres_fdw add function my_op_cmp(a int, b int);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index d22c974..d9b26b5 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2844,10 +2844,6 @@ estimate_path_cost_size(PlannerInfo *root,
 			 * strategy will be considered at remote side, thus for
 			 * simplicity, we put all startup related costs in startup_cost
 			 * and all finalization and run cost are added in total_cost.
-			 *
-			 * Also, core does not care about costing HAVING expressions and
-			 * adding that to the costs.  So similarly, here too we are not
-			 * considering remote and local conditions for costing.
 			 */
 
 			ofpinfo = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private;
@@ -2880,10 +2876,25 @@ estimate_path_cost_size(PlannerInfo *root,
 											input_rows, NULL);
 
 			/*
-			 * Number of rows expected from foreign server will be same as
-			 * that of number of groups.
+			 * Get the retrieved-rows and rows estimates.  If there are HAVING
+			 * quals, account for the selectivities of the remotely-checked
+			 * and locally-checked quals.
 			 */
-			rows = retrieved_rows = numGroups;
+			if (root->parse->havingQual)
+			{
+				retrieved_rows =
+					clamp_row_est(numGroups *
+								  clauselist_selectivity(root,
+														 fpinfo->remote_conds,
+														 0,
+														 JOIN_INNER,
+														 NULL));
+				rows = clamp_row_est(retrieved_rows * fpinfo->local_conds_sel);
+			}
+			else
+			{
+				rows = retrieved_rows = numGroups;
+			}
 
 			/*-----
 			 * Startup cost includes:
@@ -2909,6 +2920,20 @@ estimate_path_cost_size(PlannerInfo *root,
 			run_cost += aggcosts.finalCost * numGroups;
 			run_cost += cpu_tuple_cost * numGroups;
 			run_cost += ptarget->cost.per_tuple * numGroups;
+
+			/* Accout for the eval cost of HAVING quals, if any */
+			if (root->parse->havingQual)
+			{
+				QualCost	remote_cost;
+
+				/* Add in the eval cost of the remotely-checked quals */
+				cost_qual_eval(&remote_cost, fpinfo->remote_conds, root);
+				startup_cost += remote_cost.startup;
+				run_cost += remote_cost.per_tuple * numGroups;
+				/* Add in the eval cost of the locally-checked quals */
+				startup_cost += fpinfo->local_conds_cost.startup;
+				run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
+			}
 		}
 		else
 		{
@@ -5496,6 +5521,19 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	if (!foreign_grouping_ok(root, grouped_rel, extra->havingQual))
 		return;
 
+	/*
+	 * Compute the selectivity and cost of the local_conds, so we don't have
+	 * to do it over again for each path.  The best we can do for these
+	 * conditions is to estimate selectivity on the basis of local statistics.
+	 */
+	fpinfo->local_conds_sel = clauselist_selectivity(root,
+													 fpinfo->local_conds,
+													 0,
+													 JOIN_INNER,
+													 NULL);
+
+	cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
+
 	/* Estimate the cost of push down */
 	estimate_path_cost_size(root, grouped_rel, NIL, NIL, &rows,
 							&width, &startup_cost, &total_cost);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 6aa9a7f..f963e99 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -807,6 +807,9 @@ create operator class my_op_class for type int using btree family my_op_family a
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
 
+-- Update local stats on ft2
+ANALYZE ft2;
+
 -- Add into extension
 alter extension postgres_fdw add operator class my_op_class using btree;
 alter extension postgres_fdw add function my_op_cmp(a int, b int);

Reply via email to