From 71409efad8606207beb36878be9e5347c031cd2c Mon Sep 17 00:00:00 2001
From: Etsuro Fujita <efujita@postgresql.org>
Date: Thu, 6 Jun 2019 17:40:22 +0900
Subject: [PATCH] postgres_fdw: Fix costing of pre-sorted foreign paths with
 local stats.

Commit aa09cd242f modified estimate_path_cost_size() so that it reuses
cached costs of a basic foreign path for a given foreign base/join
relation when costing pre-sorted foreign paths for that relation, but it
incorrectly re-computed retrieved_rows, an estimated number of rows
fetched from the remote side, which is needed for costing both the basic
and pre-sorted foreign paths.  To fix, handle retrieved_rows the same
way as the cached costs: store in that relation's fpinfo the
retrieved_rows estimate computed for costing the basic foreign path, and
reuse it when costing the pre-sorted foreign paths.  Also, reuse the
rows/width estimates stored in that relation's fpinfo when costing the
pre-sorted foreign paths, to make the code consistent.

In commit ffab494a4d, to extend the costing mentioned above to the
foreign-grouping case, I made a change to add_foreign_grouping_paths()
to store in a given foreign grouped relation's RelOptInfo the rows
estimate for that relation for reuse in the costing, but this patch
makes that change unnecessary since we already store the row estimate in
that relation's fpinfo, which this patch will reuse when costing a
foreign path for that relation that has the sortClause ordering; remove
the change I made in that commit.

While at it, fix thinko in commit 7012b132d0: in
estimate_path_cost_size(), the width estimate for a given foreign
grouped relation was reset incorrectly in the process of costing a basic
foreign path for that relation with local stats.

Like 449d0a8550, apply this to HEAD only to avoid destabilizing existing
plan choices.

Author: Etsuro Fujita
Discussion: https://postgr.es/m/CAPmGK17jaJLPDEkgnP2VmkOg=5wT8YQ1CqssU8JRpZ_NSE+dqQ@mail.gmail.com
---
 contrib/postgres_fdw/postgres_fdw.c | 95 +++++++++++++++++------------
 contrib/postgres_fdw/postgres_fdw.h |  9 ++-
 2 files changed, 63 insertions(+), 41 deletions(-)

diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 1b09aa5a01..4111968ec4 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -661,10 +661,11 @@ postgresGetForeignRelSize(PlannerInfo *root,
 	cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
 
 	/*
-	 * Set cached relation costs to some negative value, so that we can detect
-	 * when they are set to some sensible costs during one (usually the first)
-	 * of the calls to estimate_path_cost_size().
+	 * Set # of retrieved rows and cached relation costs to some negative
+	 * value, so that we can detect when they are set to some sensible values,
+	 * during one (usually the first) of the calls to estimate_path_cost_size.
 	 */
+	fpinfo->retrieved_rows = -1;
 	fpinfo->rel_startup_cost = -1;
 	fpinfo->rel_total_cost = -1;
 
@@ -2616,7 +2617,6 @@ estimate_path_cost_size(PlannerInfo *root,
 	int			width;
 	Cost		startup_cost;
 	Cost		total_cost;
-	Cost		cpu_per_tuple;
 
 	/* Make sure the core code has set up the relation's reltarget */
 	Assert(foreignrel->reltarget);
@@ -2729,26 +2729,20 @@ estimate_path_cost_size(PlannerInfo *root,
 		 */
 		Assert(param_join_conds == NIL);
 
-		/*
-		 * Use rows/width estimates made by set_baserel_size_estimates() for
-		 * base foreign relations and set_joinrel_size_estimates() for join
-		 * between foreign relations.
-		 */
-		rows = foreignrel->rows;
-		width = foreignrel->reltarget->width;
-
-		/* Back into an estimate of the number of retrieved rows. */
-		retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
-
 		/*
 		 * We will come here again and again with different set of pathkeys or
 		 * additional post-scan/join-processing steps that caller wants to
-		 * cost.  We don't need to calculate the costs of the underlying scan,
-		 * join, or grouping each time.  Instead, use the costs if we have
-		 * cached them already.
+		 * cost.  We don't need to calculate the cost/size estimates for the
+		 * underlying scan, join, or grouping each time.  Instead, use those
+		 * estimates if we have cached them already.
 		 */
 		if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0)
 		{
+			Assert(fpinfo->retrieved_rows >= 0);
+
+			rows = fpinfo->rows;
+			retrieved_rows = fpinfo->retrieved_rows;
+			width = fpinfo->width;
 			startup_cost = fpinfo->rel_startup_cost;
 			run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
 
@@ -2778,6 +2772,10 @@ estimate_path_cost_size(PlannerInfo *root,
 			QualCost	remote_conds_cost;
 			double		nrows;
 
+			/* Use rows/width estimates made by the core code. */
+			rows = foreignrel->rows;
+			width = foreignrel->reltarget->width;
+
 			/* For join we expect inner and outer relations set */
 			Assert(fpinfo->innerrel && fpinfo->outerrel);
 
@@ -2786,7 +2784,12 @@ estimate_path_cost_size(PlannerInfo *root,
 
 			/* Estimate of number of rows in cross product */
 			nrows = fpinfo_i->rows * fpinfo_o->rows;
-			/* Clamp retrieved rows estimate to at most size of cross product */
+
+			/*
+			 * Back into an estimate of the number of retrieved rows.  Just in
+			 * case this is nuts, clamp to at most nrow.
+			 */
+			retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
 			retrieved_rows = Min(retrieved_rows, nrows);
 
 			/*
@@ -2864,9 +2867,8 @@ estimate_path_cost_size(PlannerInfo *root,
 
 			ofpinfo = (PgFdwRelationInfo *) outerrel->fdw_private;
 
-			/* Get rows and width from input rel */
+			/* Get rows from input rel */
 			input_rows = ofpinfo->rows;
-			width = ofpinfo->width;
 
 			/* Collect statistics about aggregates for estimating costs. */
 			MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
@@ -2913,6 +2915,9 @@ estimate_path_cost_size(PlannerInfo *root,
 				rows = retrieved_rows = numGroups;
 			}
 
+			/* Use width estimate made by the core code. */
+			width = foreignrel->reltarget->width;
+
 			/*-----
 			 * Startup cost includes:
 			 *	  1. Startup cost for underneath input relation, adjusted for
@@ -2959,7 +2964,17 @@ estimate_path_cost_size(PlannerInfo *root,
 		}
 		else
 		{
-			/* Clamp retrieved rows estimates to at most foreignrel->tuples. */
+			Cost		cpu_per_tuple;
+
+			/* Use rows/width estimates made by set_baserel_size_estimates. */
+			rows = foreignrel->rows;
+			width = foreignrel->reltarget->width;
+
+			/*
+			 * Back into an estimate of the number of retrieved rows.  Just in
+			 * case this is nuts, clamp to at most foreignrel->tuples.
+			 */
+			retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
 			retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
 
 			/*
@@ -3036,18 +3051,20 @@ estimate_path_cost_size(PlannerInfo *root,
 	}
 
 	/*
-	 * Cache the costs for scans, joins, or groupings without any
-	 * parameterization, pathkeys, or additional post-scan/join-processing
-	 * steps, before adding the costs for transferring data from the foreign
-	 * server.  These costs are useful for costing remote joins involving this
-	 * relation or costing other remote operations for this relation such as
-	 * remote sorts and remote LIMIT restrictions, when the costs can not be
-	 * obtained from the foreign server.  This function will be called at
-	 * least once for every foreign relation without any parameterization,
-	 * pathkeys, or additional post-scan/join-processing steps.
+	 * Cache the retrieved rows and cost estimates for scans, joins, or
+	 * groupings without any parameterization, pathkeys, or additional
+	 * post-scan/join-processing steps, before adding the costs for
+	 * transferring data from the foreign server.  These estimates are useful
+	 * for costing remote joins involving this relation or costing other
+	 * remote operations on this relation such as remote sorts and remote
+	 * LIMIT restrictions, when the costs can not be obtained from the foreign
+	 * server.  This function will be called at least once for every foreign
+	 * relation without any parameterization, pathkeys, or additional
+	 * post-scan/join-processing steps.
 	 */
 	if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL)
 	{
+		fpinfo->retrieved_rows = retrieved_rows;
 		fpinfo->rel_startup_cost = startup_cost;
 		fpinfo->rel_total_cost = total_cost;
 	}
@@ -5150,10 +5167,11 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
 		fpinfo->user = NULL;
 
 	/*
-	 * Set cached relation costs to some negative value, so that we can detect
-	 * when they are set to some sensible costs, during one (usually the
-	 * first) of the calls to estimate_path_cost_size().
+	 * Set # of retrieved rows and cached relation costs to some negative
+	 * value, so that we can detect when they are set to some sensible values,
+	 * during one (usually the first) of the calls to estimate_path_cost_size.
 	 */
+	fpinfo->retrieved_rows = -1;
 	fpinfo->rel_startup_cost = -1;
 	fpinfo->rel_total_cost = -1;
 
@@ -5701,10 +5719,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
 	fpinfo->pushdown_safe = true;
 
 	/*
-	 * Set cached relation costs to some negative value, so that we can detect
-	 * when they are set to some sensible costs, during one (usually the
-	 * first) of the calls to estimate_path_cost_size().
+	 * Set # of retrieved rows and cached relation costs to some negative
+	 * value, so that we can detect when they are set to some sensible values,
+	 * during one (usually the first) of the calls to estimate_path_cost_size.
 	 */
+	fpinfo->retrieved_rows = -1;
 	fpinfo->rel_startup_cost = -1;
 	fpinfo->rel_total_cost = -1;
 
@@ -5846,8 +5865,6 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	fpinfo->startup_cost = startup_cost;
 	fpinfo->total_cost = total_cost;
 
-	grouped_rel->rows = fpinfo->rows;
-
 	/* Create and add foreign path to the grouping relation. */
 	grouppath = create_foreign_upper_path(root,
 										  grouped_rel,
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 30b6502bc0..35545e1831 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -59,12 +59,17 @@ typedef struct PgFdwRelationInfo
 	/* Selectivity of join conditions */
 	Selectivity joinclause_sel;
 
-	/* Estimated size and cost for a scan or join. */
+	/* Estimated size and cost for a scan, join, or grouping/aggregation. */
 	double		rows;
 	int			width;
 	Cost		startup_cost;
 	Cost		total_cost;
-	/* Costs excluding costs for transferring data from the foreign server */
+	/*
+	 * Estimated number of rows fetched from the foreign server, and costs
+	 * excluding costs for transferring those rows from the foreign server.
+	 * These are only used by estimate_path_cost_size().
+	 */
+	double		retrieved_rows;
 	Cost		rel_startup_cost;
 	Cost		rel_total_cost;
 
-- 
2.19.2

