On Fri, Oct 23, 2015 at 2:43 AM, Robert Haas <robertmh...@gmail.com> wrote:

> On Wed, Oct 21, 2015 at 5:23 AM, Ashutosh Bapat
> <ashutosh.ba...@enterprisedb.com> wrote:
> > Increasing the sorting cost factor (when use_remote_estimates = false)
> from
> > 1.1 to 1.2 makes the difference disappear.
> >
> > Since the startup costs for postgres_fdw are large portion of total cost,
> > extra 10% of rest of the cost is comparable to 1% fuzzy limit. IMO, we
> > shouldn't bother too much about it as the path costs are not much
> different.
>
> My feeling is that cranking the sorting cost factor up to 20-25% would
> be a good idea, just so we have less unnecessary plan churn.  I dunno
> if sorting always costs that much, but if a 10% cost overhead is
> really 1% because it only applies to a fraction of the cost, I don't
> think that's good.  The whole point was to pick something large enough
> that we wouldn't take the sorted path unless we will benefit from the
> sort, and clearly that's not what happened here.
>
>
PFA patch with the default multiplication factor for sort bumped up to 1.2.


> --
> Robert Haas
> EnterpriseDB: http://www.enterprisedb.com
> The Enterprise PostgreSQL Company
>



-- 
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 697de60..cb5c3ae 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -186,23 +186,26 @@ is_foreign_expr(PlannerInfo *root,
 	 * Check that the expression consists of nodes that are safe to execute
 	 * remotely.
 	 */
 	glob_cxt.root = root;
 	glob_cxt.foreignrel = baserel;
 	loc_cxt.collation = InvalidOid;
 	loc_cxt.state = FDW_COLLATE_NONE;
 	if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt))
 		return false;
 
-	/* Expressions examined here should be boolean, ie noncollatable */
-	Assert(loc_cxt.collation == InvalidOid);
-	Assert(loc_cxt.state == FDW_COLLATE_NONE);
+	/*
+	 * The collation of the expression should be none or originate from a
+	 * foreign var.
+	 */
+	Assert(loc_cxt.state == FDW_COLLATE_NONE ||
+			loc_cxt.state == FDW_COLLATE_SAFE);
 
 	/*
 	 * An expression which includes any mutable functions can't be sent over
 	 * because its result is not stable.  For example, sending now() remote
 	 * side could cause confusion from clock offsets.  Future versions might
 	 * be able to make this choice with more granularity.  (We check this last
 	 * because it requires a lot of expensive catalog lookups.)
 	 */
 	if (contain_mutable_functions((Node *) expr))
 		return false;
@@ -1870,10 +1873,57 @@ printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod,
  */
 static void
 printRemotePlaceholder(Oid paramtype, int32 paramtypmod,
 					   deparse_expr_cxt *context)
 {
 	StringInfo	buf = context->buf;
 	char	   *ptypename = format_type_with_typemod(paramtype, paramtypmod);
 
 	appendStringInfo(buf, "((SELECT null::%s)::%s)", ptypename, ptypename);
 }
+
+/*
+ * Deparse ORDER BY clause according to the given pathkeys for given base
+ * relation. From given pathkeys expressions belonging entirely to the given
+ * base relation are obtained and deparsed.
+ */
+void
+appendOrderByClause(StringInfo buf, PlannerInfo *root, RelOptInfo *baserel,
+					List *pathkeys)
+{
+	ListCell			*lcell;
+	deparse_expr_cxt	context;
+	int					nestlevel;
+	char				*delim = " ";
+
+	/* Set up context struct for recursion */
+	context.root = root;
+	context.foreignrel = baserel;
+	context.buf = buf;
+	context.params_list = NULL;
+
+	/* Make sure any constants in the exprs are printed portably */
+	nestlevel = set_transmission_modes();
+
+	appendStringInfo(buf, " ORDER BY");
+	foreach(lcell, pathkeys)
+	{
+		PathKey				*pathkey = lfirst(lcell);
+		Expr				*em_expr;
+
+		em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel);
+		Assert(em_expr != NULL);
+
+		appendStringInfoString(buf, delim);
+		deparseExpr(em_expr, &context);
+		if (pathkey->pk_strategy == BTLessStrategyNumber)
+			appendStringInfoString(buf, " ASC");
+		else
+			appendStringInfoString(buf, " DESC");
+
+		if (pathkey->pk_nulls_first)
+			appendStringInfoString(buf, " NULLS FIRST");
+
+		delim = ", ";
+	}
+	reset_transmission_modes(nestlevel);
+}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 65ea6e8..58bf76c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -127,86 +127,82 @@ ALTER FOREIGN TABLE ft2 ALTER COLUMN c1 OPTIONS (column_name 'C 1');
 (2 rows)
 
 -- Now we should be able to run ANALYZE.
 -- To exercise multiple code paths, we use local stats on ft1
 -- and remote-estimate mode on ft2.
 ANALYZE ft1;
 ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
 -- ===================================================================
 -- simple queries
 -- ===================================================================
--- single table, with/without alias
+-- single table without alias
 EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
-           QUERY PLAN            
----------------------------------
+        QUERY PLAN         
+---------------------------
  Limit
-   ->  Sort
-         Sort Key: c3, c1
-         ->  Foreign Scan on ft1
-(4 rows)
+   ->  Foreign Scan on ft1
+(2 rows)
 
 SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
  c1  | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
 -----+----+-------+------------------------------+--------------------------+----+------------+-----
  101 |  1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1  | 1          | foo
  102 |  2 | 00102 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2  | 2          | foo
  103 |  3 | 00103 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3  | 3          | foo
  104 |  4 | 00104 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4  | 4          | foo
  105 |  5 | 00105 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5  | 5          | foo
  106 |  6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6  | 6          | foo
  107 |  7 | 00107 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7  | 7          | foo
  108 |  8 | 00108 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8  | 8          | foo
  109 |  9 | 00109 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9  | 9          | foo
  110 |  0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0  | 0          | foo
 (10 rows)
 
-EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+-- single table with alias - also test that tableoid sort is not pushed to remote side
+EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
                                      QUERY PLAN                                      
 -------------------------------------------------------------------------------------
  Limit
-   Output: c1, c2, c3, c4, c5, c6, c7, c8
+   Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
    ->  Sort
-         Output: c1, c2, c3, c4, c5, c6, c7, c8
-         Sort Key: t1.c3, t1.c1
+         Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
+         Sort Key: t1.c3, t1.c1, t1.tableoid
          ->  Foreign Scan on public.ft1 t1
-               Output: c1, c2, c3, c4, c5, c6, c7, c8
+               Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
                Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
 (8 rows)
 
-SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
  c1  | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
 -----+----+-------+------------------------------+--------------------------+----+------------+-----
  101 |  1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1  | 1          | foo
  102 |  2 | 00102 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2  | 2          | foo
  103 |  3 | 00103 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3  | 3          | foo
  104 |  4 | 00104 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4  | 4          | foo
  105 |  5 | 00105 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5  | 5          | foo
  106 |  6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6  | 6          | foo
  107 |  7 | 00107 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7  | 7          | foo
  108 |  8 | 00108 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8  | 8          | foo
  109 |  9 | 00109 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9  | 9          | foo
  110 |  0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0  | 0          | foo
 (10 rows)
 
 -- whole-row reference
 EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                     QUERY PLAN                                      
--------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                
+----------------------------------------------------------------------------------------------------------
  Limit
    Output: t1.*, c3, c1
-   ->  Sort
+   ->  Foreign Scan on public.ft1 t1
          Output: t1.*, c3, c1
-         Sort Key: t1.c3, t1.c1
-         ->  Foreign Scan on public.ft1 t1
-               Output: t1.*, c3, c1
-               Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
-(8 rows)
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c3 ASC, "C 1" ASC
+(5 rows)
 
 SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
                                              t1                                             
 --------------------------------------------------------------------------------------------
  (101,1,00101,"Fri Jan 02 00:00:00 1970 PST","Fri Jan 02 00:00:00 1970",1,"1         ",foo)
  (102,2,00102,"Sat Jan 03 00:00:00 1970 PST","Sat Jan 03 00:00:00 1970",2,"2         ",foo)
  (103,3,00103,"Sun Jan 04 00:00:00 1970 PST","Sun Jan 04 00:00:00 1970",3,"3         ",foo)
  (104,4,00104,"Mon Jan 05 00:00:00 1970 PST","Mon Jan 05 00:00:00 1970",4,"4         ",foo)
  (105,5,00105,"Tue Jan 06 00:00:00 1970 PST","Tue Jan 06 00:00:00 1970",5,"5         ",foo)
  (106,6,00106,"Wed Jan 07 00:00:00 1970 PST","Wed Jan 07 00:00:00 1970",6,"6         ",foo)
@@ -643,20 +639,33 @@ SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5));
 
 SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
  c1 | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
 ----+----+-------+------------------------------+--------------------------+----+------------+-----
   1 |  1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1  | 1          | foo
   2 |  2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2  | 2          | foo
   3 |  3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3  | 3          | foo
   4 |  4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4  | 4          | foo
 (4 rows)
 
+-- we should not push order by clause with volatile expressions
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, random();
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Sort
+   Output: c1, c2, c3, c4, c5, c6, c7, c8, (random())
+   Sort Key: ft2.c1, (random())
+   ->  Foreign Scan on public.ft2
+         Output: c1, c2, c3, c4, c5, c6, c7, c8, random()
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(6 rows)
+
 -- ===================================================================
 -- parameterized queries
 -- ===================================================================
 -- simple join
 PREPARE st1(int, int) AS SELECT t1.c3, t2.c3 FROM ft1 t1, ft2 t2 WHERE t1.c1 = $1 AND t2.c1 = $2;
 EXPLAIN (VERBOSE, COSTS false) EXECUTE st1(1, 2);
                              QUERY PLAN                             
 --------------------------------------------------------------------
  Nested Loop
    Output: t1.c3, t2.c3
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 1902f1f..4ff6367 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -40,20 +40,23 @@
 #include "utils/sampling.h"
 
 PG_MODULE_MAGIC;
 
 /* Default CPU cost to start up a foreign query. */
 #define DEFAULT_FDW_STARTUP_COST	100.0
 
 /* Default CPU cost to process 1 row (above and beyond cpu_tuple_cost). */
 #define DEFAULT_FDW_TUPLE_COST		0.01
 
+/* If no remote estimates, assume a sort costs 10% extra */
+#define DEFAULT_FDW_SORT_MULTIPLIER 1.2
+
 /*
  * FDW-specific planner information kept in RelOptInfo.fdw_private for a
  * foreign table.  This information is collected by postgresGetForeignRelSize.
  */
 typedef struct PgFdwRelationInfo
 {
 	/* baserestrictinfo clauses, broken down into safe and unsafe subsets. */
 	List	   *remote_conds;
 	List	   *local_conds;
 
@@ -289,20 +292,21 @@ static bool postgresAnalyzeForeignTable(Relation relation,
 							BlockNumber *totalpages);
 static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt,
 							Oid serverOid);
 
 /*
  * Helper functions
  */
 static void estimate_path_cost_size(PlannerInfo *root,
 						RelOptInfo *baserel,
 						List *join_conds,
+						List *pathkeys,
 						double *p_rows, int *p_width,
 						Cost *p_startup_cost, Cost *p_total_cost);
 static void get_remote_estimate(const char *sql,
 					PGconn *conn,
 					double *rows,
 					int *width,
 					Cost *startup_cost,
 					Cost *total_cost);
 static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
 						  EquivalenceClass *ec, EquivalenceMember *em,
@@ -490,21 +494,21 @@ postgresGetForeignRelSize(PlannerInfo *root,
 	 * average row width.  Otherwise, estimate using whatever statistics we
 	 * have locally, in a way similar to ordinary tables.
 	 */
 	if (fpinfo->use_remote_estimate)
 	{
 		/*
 		 * Get cost/size estimates with help of remote server.  Save the
 		 * values in fpinfo so we don't need to do it again to generate the
 		 * basic foreign path.
 		 */
-		estimate_path_cost_size(root, baserel, NIL,
+		estimate_path_cost_size(root, baserel, NIL, NIL,
 								&fpinfo->rows, &fpinfo->width,
 								&fpinfo->startup_cost, &fpinfo->total_cost);
 
 		/* Report estimated baserel size to planner. */
 		baserel->rows = fpinfo->rows;
 		baserel->width = fpinfo->width;
 	}
 	else
 	{
 		/*
@@ -520,57 +524,112 @@ postgresGetForeignRelSize(PlannerInfo *root,
 		{
 			baserel->pages = 10;
 			baserel->tuples =
 				(10 * BLCKSZ) / (baserel->width + MAXALIGN(SizeofHeapTupleHeader));
 		}
 
 		/* Estimate baserel size as best we can with local statistics. */
 		set_baserel_size_estimates(root, baserel);
 
 		/* Fill in basically-bogus cost estimates for use later. */
-		estimate_path_cost_size(root, baserel, NIL,
+		estimate_path_cost_size(root, baserel, NIL, NIL,
 								&fpinfo->rows, &fpinfo->width,
 								&fpinfo->startup_cost, &fpinfo->total_cost);
 	}
 }
 
 /*
  * postgresGetForeignPaths
  *		Create possible scan paths for a scan on the foreign table
  */
 static void
 postgresGetForeignPaths(PlannerInfo *root,
 						RelOptInfo *baserel,
 						Oid foreigntableid)
 {
 	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
 	ForeignPath *path;
 	List	   *ppi_list;
 	ListCell   *lc;
+	List	   *usable_pathkeys = NIL;
 
 	/*
 	 * Create simplest ForeignScan path node and add it to baserel.  This path
 	 * corresponds to SeqScan path of regular tables (though depending on what
 	 * baserestrict conditions we were able to send to remote, there might
 	 * actually be an indexscan happening there).  We already did all the work
 	 * to estimate cost and size of this path.
 	 */
 	path = create_foreignscan_path(root, baserel,
 								   fpinfo->rows,
 								   fpinfo->startup_cost,
 								   fpinfo->total_cost,
 								   NIL, /* no pathkeys */
 								   NULL,		/* no outer rel either */
 								   NIL);		/* no fdw_private list */
 	add_path(baserel, (Path *) path);
 
 	/*
+	 * Determine whether we can potentially push query pathkeys to the remote
+	 * side, avoiding a local sort.
+	 */
+	foreach(lc, root->query_pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+		Expr	   *em_expr;
+
+		/*
+		 * is_foreign_expr would detect volatile expressions as well, but
+		 * ec_has_volatile saves some cycles.
+		 */
+		if (!pathkey_ec->ec_has_volatile &&
+			(em_expr = find_em_expr_for_rel(pathkey_ec, baserel)) &&
+			is_foreign_expr(root, baserel, em_expr))
+			usable_pathkeys = lappend(usable_pathkeys, pathkey);
+		else
+		{
+			/*
+			 * The planner and executor don't have any clever strategy for
+			 * taking data sorted by a prefix of the query's pathkeys and
+			 * getting it to be sorted by all of those pathekeys.  We'll just
+			 * end up resorting the entire data set.  So, unless we can push
+			 * down all of the query pathkeys, forget it.
+			 */
+			list_free(usable_pathkeys);
+			usable_pathkeys = NIL;
+			break;
+		}
+	}
+
+	/* Create a path with useful pathkeys, if we found one. */
+	if (usable_pathkeys != NULL)
+	{
+		double		rows;
+		int			width;
+		Cost		startup_cost;
+		Cost		total_cost;
+
+		estimate_path_cost_size(root, baserel, NIL, usable_pathkeys,
+								&rows, &width, &startup_cost, &total_cost);
+
+		add_path(baserel, (Path *)
+				 create_foreignscan_path(root, baserel,
+										 rows,
+										 startup_cost,
+										 total_cost,
+										 usable_pathkeys,
+										 NULL,
+										 NIL));
+	}
+
+	/*
 	 * If we're not using remote estimates, stop here.  We have no way to
 	 * estimate whether any join clauses would be worth sending across, so
 	 * don't bother building parameterized paths.
 	 */
 	if (!fpinfo->use_remote_estimate)
 		return;
 
 	/*
 	 * Thumb through all join clauses for the rel to identify which outer
 	 * relations could supply one or more safe-to-send-to-remote join clauses.
@@ -703,21 +762,21 @@ postgresGetForeignPaths(PlannerInfo *root,
 	foreach(lc, ppi_list)
 	{
 		ParamPathInfo *param_info = (ParamPathInfo *) lfirst(lc);
 		double		rows;
 		int			width;
 		Cost		startup_cost;
 		Cost		total_cost;
 
 		/* Get a cost estimate from the remote */
 		estimate_path_cost_size(root, baserel,
-								param_info->ppi_clauses,
+								param_info->ppi_clauses, NIL,
 								&rows, &width,
 								&startup_cost, &total_cost);
 
 		/*
 		 * ppi_rows currently won't get looked at by anything, but still we
 		 * may as well ensure that it matches our idea of the rowcount.
 		 */
 		param_info->ppi_rows = rows;
 
 		/* Make the path */
@@ -804,20 +863,24 @@ postgresGetForeignPlan(PlannerInfo *root,
 	 * Build the query string to be sent for execution, and identify
 	 * expressions to be sent as parameters.
 	 */
 	initStringInfo(&sql);
 	deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used,
 					 &retrieved_attrs);
 	if (remote_conds)
 		appendWhereClause(&sql, root, baserel, remote_conds,
 						  true, &params_list);
 
+	/* Add ORDER BY clause if we found any useful pathkeys */
+	if (best_path->path.pathkeys)
+		appendOrderByClause(&sql, root, baserel, best_path->path.pathkeys);
+
 	/*
 	 * Add FOR UPDATE/SHARE if appropriate.  We apply locking during the
 	 * initial row fetch, rather than later on as is done for local tables.
 	 * The extra roundtrips involved in trying to duplicate the local
 	 * semantics exactly don't seem worthwhile (see also comments for
 	 * RowMarkType).
 	 *
 	 * Note: because we actually run the query as a cursor, this assumes that
 	 * DECLARE CURSOR ... FOR UPDATE is supported, which it isn't before 8.3.
 	 */
@@ -1721,20 +1784,21 @@ postgresExplainForeignModify(ModifyTableState *mtstate,
  * estimate_path_cost_size
  *		Get cost and size estimates for a foreign scan
  *
  * We assume that all the baserestrictinfo clauses will be applied, plus
  * any join clauses listed in join_conds.
  */
 static void
 estimate_path_cost_size(PlannerInfo *root,
 						RelOptInfo *baserel,
 						List *join_conds,
+						List *pathkeys,
 						double *p_rows, int *p_width,
 						Cost *p_startup_cost, Cost *p_total_cost)
 {
 	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
 	double		rows;
 	double		retrieved_rows;
 	int			width;
 	Cost		startup_cost;
 	Cost		total_cost;
 	Cost		run_cost;
@@ -1773,20 +1837,23 @@ estimate_path_cost_size(PlannerInfo *root,
 		appendStringInfoString(&sql, "EXPLAIN ");
 		deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used,
 						 &retrieved_attrs);
 		if (fpinfo->remote_conds)
 			appendWhereClause(&sql, root, baserel, fpinfo->remote_conds,
 							  true, NULL);
 		if (remote_join_conds)
 			appendWhereClause(&sql, root, baserel, remote_join_conds,
 							  (fpinfo->remote_conds == NIL), NULL);
 
+		if (pathkeys)
+			appendOrderByClause(&sql, root, baserel, pathkeys);
+
 		/* Get the remote estimate */
 		conn = GetConnection(fpinfo->server, fpinfo->user, false);
 		get_remote_estimate(sql.data, conn, &rows, &width,
 							&startup_cost, &total_cost);
 		ReleaseConnection(conn);
 
 		retrieved_rows = rows;
 
 		/* Factor in the selectivity of the locally-checked quals */
 		local_sel = clauselist_selectivity(root,
@@ -1830,20 +1897,35 @@ estimate_path_cost_size(PlannerInfo *root,
 		 * too.
 		 */
 		startup_cost = 0;
 		run_cost = 0;
 		run_cost += seq_page_cost * baserel->pages;
 
 		startup_cost += baserel->baserestrictcost.startup;
 		cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 		run_cost += cpu_per_tuple * baserel->tuples;
 
+		/*
+		 * Without remote estimates, we have no real way to estimate the cost
+		 * of generating sorted output.  It could be free if the query plan
+		 * the remote side would have chosen generates properly-sorted output
+		 * anyway, but in most cases it will cost something.  Estimate a value
+		 * high enough that we won't pick the sorted path when the ordering
+		 * isn't locally useful, but low enough that we'll err on the side of
+		 * pushing down the ORDER BY clause when it's useful to do so.
+		 */
+		if (pathkeys != NIL)
+		{
+			startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+			run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+		}
+
 		total_cost = startup_cost + run_cost;
 	}
 
 	/*
 	 * Add some additional cost factors to account for connection overhead
 	 * (fdw_startup_cost), transferring data across the network
 	 * (fdw_tuple_cost per retrieved row), and local manipulation of the data
 	 * (cpu_tuple_cost per retrieved row).
 	 */
 	startup_cost += fpinfo->fdw_startup_cost;
@@ -2995,10 +3077,38 @@ static void
 conversion_error_callback(void *arg)
 {
 	ConversionLocation *errpos = (ConversionLocation *) arg;
 	TupleDesc	tupdesc = RelationGetDescr(errpos->rel);
 
 	if (errpos->cur_attno > 0 && errpos->cur_attno <= tupdesc->natts)
 		errcontext("column \"%s\" of foreign table \"%s\"",
 				   NameStr(tupdesc->attrs[errpos->cur_attno - 1]->attname),
 				   RelationGetRelationName(errpos->rel));
 }
+
+/*
+ * Find an equivalence class member expression, all of whose Vars, come from
+ * the indicated relation.
+ */
+extern Expr *
+find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
+{
+	ListCell   *lc_em;
+
+	foreach(lc_em, ec->ec_members)
+	{
+		EquivalenceMember *em = lfirst(lc_em);
+
+		if (bms_equal(em->em_relids, rel->relids))
+		{
+			/*
+			 * If there is more than one equivalence member whose Vars are
+			 * taken entirely from this relation, we'll be content to choose
+			 * any one of those.
+			 */
+			return em->em_expr;
+		}
+	}
+
+	/* We didn't find any suitable equivalence class expression */
+	return NULL;
+}
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 3835ddb..8956cd2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -67,12 +67,15 @@ extern void deparseUpdateSql(StringInfo buf, PlannerInfo *root,
 				 List *targetAttrs, List *returningList,
 				 List **retrieved_attrs);
 extern void deparseDeleteSql(StringInfo buf, PlannerInfo *root,
 				 Index rtindex, Relation rel,
 				 List *returningList,
 				 List **retrieved_attrs);
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 				  List **retrieved_attrs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
+extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern void appendOrderByClause(StringInfo buf, PlannerInfo *root,
+								RelOptInfo *baserel, List *pathkeys);
 
 #endif   /* POSTGRES_FDW_H */
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 11160f8..861464c 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -131,25 +131,26 @@ ALTER FOREIGN TABLE ft2 ALTER COLUMN c1 OPTIONS (column_name 'C 1');
 
 -- Now we should be able to run ANALYZE.
 -- To exercise multiple code paths, we use local stats on ft1
 -- and remote-estimate mode on ft2.
 ANALYZE ft1;
 ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
 
 -- ===================================================================
 -- simple queries
 -- ===================================================================
--- single table, with/without alias
+-- single table without alias
 EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
 SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
-EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+-- single table with alias - also test that tableoid sort is not pushed to remote side
+EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
+SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
 -- whole-row reference
 EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
 SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
 -- empty result
 SELECT * FROM ft1 WHERE false;
 -- with WHERE clause
 EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE t1.c1 = 101 AND t1.c6 = '1' AND t1.c7 >= '1';
 SELECT * FROM ft1 t1 WHERE t1.c1 = 101 AND t1.c6 = '1' AND t1.c7 >= '1';
 -- with FOR UPDATE/SHARE
 EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c1 = 101 FOR UPDATE;
@@ -207,20 +208,23 @@ EXPLAIN (VERBOSE, COSTS false)
 SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2;
 -- check both safe and unsafe join conditions
 EXPLAIN (VERBOSE, COSTS false)
   SELECT * FROM ft2 a, ft2 b
   WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
 SELECT * FROM ft2 a, ft2 b
 WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
 -- bug before 9.3.5 due to sloppy handling of remote-estimate parameters
 SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5));
 SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
+-- we should not push order by clause with volatile expressions
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, random();
 
 -- ===================================================================
 -- parameterized queries
 -- ===================================================================
 -- simple join
 PREPARE st1(int, int) AS SELECT t1.c3, t2.c3 FROM ft1 t1, ft2 t2 WHERE t1.c1 = $1 AND t2.c1 = $2;
 EXPLAIN (VERBOSE, COSTS false) EXECUTE st1(1, 2);
 EXECUTE st1(1, 1);
 EXECUTE st1(101, 101);
 -- subquery using stable function (can't be sent to remote)
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to