(2018/11/28 13:38), Etsuro Fujita wrote:
> BTW another thing I noticed is this comment on costing aggregate
> pushdown paths using local statistics in estimate_path_cost_size:
>
> * Also, core does not care about costing HAVING expressions and
> * adding that to the costs. So similarly, here too we are not
> * considering remote and local conditions for costing.
>
> I think this was true when aggregate pushdown went in, but isn't anymore
> because of commit 7b6c07547190f056b0464098bb5a2247129d7aa2. So we
> should update estimate_path_cost_size so that it accounts for the
> selectivity and cost of the HAVING expressions as well?
There seems to be no objections, I updated the patch as such. Attached
is an updated version of the patch.
Best regards,
Etsuro Fujita
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index e653c30..dfa6201 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3209,6 +3209,8 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
(6 rows)
+-- Update local stats on ft2
+ANALYZE ft2;
-- Add into extension
alter extension postgres_fdw add operator class my_op_class using btree;
alter extension postgres_fdw add function my_op_cmp(a int, b int);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index d22c974..d9b26b5 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2844,10 +2844,6 @@ estimate_path_cost_size(PlannerInfo *root,
* strategy will be considered at remote side, thus for
* simplicity, we put all startup related costs in startup_cost
* and all finalization and run cost are added in total_cost.
- *
- * Also, core does not care about costing HAVING expressions and
- * adding that to the costs. So similarly, here too we are not
- * considering remote and local conditions for costing.
*/
ofpinfo = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private;
@@ -2880,10 +2876,25 @@ estimate_path_cost_size(PlannerInfo *root,
input_rows, NULL);
/*
- * Number of rows expected from foreign server will be same as
- * that of number of groups.
+ * Get the retrieved-rows and rows estimates. If there are HAVING
+ * quals, account for the selectivities of the remotely-checked
+ * and locally-checked quals.
*/
- rows = retrieved_rows = numGroups;
+ if (root->parse->havingQual)
+ {
+ retrieved_rows =
+ clamp_row_est(numGroups *
+ clauselist_selectivity(root,
+ fpinfo->remote_conds,
+ 0,
+ JOIN_INNER,
+ NULL));
+ rows = clamp_row_est(retrieved_rows * fpinfo->local_conds_sel);
+ }
+ else
+ {
+ rows = retrieved_rows = numGroups;
+ }
/*-----
* Startup cost includes:
@@ -2909,6 +2920,20 @@ estimate_path_cost_size(PlannerInfo *root,
run_cost += aggcosts.finalCost * numGroups;
run_cost += cpu_tuple_cost * numGroups;
run_cost += ptarget->cost.per_tuple * numGroups;
+
+ /* Accout for the eval cost of HAVING quals, if any */
+ if (root->parse->havingQual)
+ {
+ QualCost remote_cost;
+
+ /* Add in the eval cost of the remotely-checked quals */
+ cost_qual_eval(&remote_cost, fpinfo->remote_conds, root);
+ startup_cost += remote_cost.startup;
+ run_cost += remote_cost.per_tuple * numGroups;
+ /* Add in the eval cost of the locally-checked quals */
+ startup_cost += fpinfo->local_conds_cost.startup;
+ run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
+ }
}
else
{
@@ -5496,6 +5521,19 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
if (!foreign_grouping_ok(root, grouped_rel, extra->havingQual))
return;
+ /*
+ * Compute the selectivity and cost of the local_conds, so we don't have
+ * to do it over again for each path. The best we can do for these
+ * conditions is to estimate selectivity on the basis of local statistics.
+ */
+ fpinfo->local_conds_sel = clauselist_selectivity(root,
+ fpinfo->local_conds,
+ 0,
+ JOIN_INNER,
+ NULL);
+
+ cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
+
/* Estimate the cost of push down */
estimate_path_cost_size(root, grouped_rel, NIL, NIL, &rows,
&width, &startup_cost, &total_cost);
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 6aa9a7f..f963e99 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -807,6 +807,9 @@ create operator class my_op_class for type int using btree family my_op_family a
explain (verbose, costs off)
select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
+-- Update local stats on ft2
+ANALYZE ft2;
+
-- Add into extension
alter extension postgres_fdw add operator class my_op_class using btree;
alter extension postgres_fdw add function my_op_cmp(a int, b int);