If there is a collate clause in the ORDER BY, the server crashes with
assertion
+    Assert(loc_cxt.state == FDW_COLLATE_NONE ||
+            loc_cxt.state == FDW_COLLATE_SAFE);


The assertion is fine as long as is_foreign_expr() tests only boolean
expressions (appearing in quals). This patch uses the function to test an
expression appearing in ORDER BY clause, which need not be boolean.
Attached patch removed the assertion and instead makes the function return
false, when the walker deems collation of the expression unsafe. The walker
can not return false when it encounter unsafe expression since the subtree
it's examining might be part of an expression which does not use the
collation ultimately.

On Wed, Oct 28, 2015 at 11:51 AM, Ashutosh Bapat <
ashutosh.ba...@enterprisedb.com> wrote:

> On Tue, Oct 27, 2015 at 6:44 PM, Fabrízio de Royes Mello <
> fabriziome...@gmail.com> wrote:
>
>>
>>
>> On Tue, Oct 27, 2015 at 5:26 AM, Ashutosh Bapat <
>> ashutosh.ba...@enterprisedb.com> wrote:
>> >
>> >
>> >
>> > On Fri, Oct 23, 2015 at 2:43 AM, Robert Haas <robertmh...@gmail.com>
>> wrote:
>> >>
>> >> On Wed, Oct 21, 2015 at 5:23 AM, Ashutosh Bapat
>> >> <ashutosh.ba...@enterprisedb.com> wrote:
>> >> > Increasing the sorting cost factor (when use_remote_estimates =
>> false) from
>> >> > 1.1 to 1.2 makes the difference disappear.
>> >> >
>> >> > Since the startup costs for postgres_fdw are large portion of total
>> cost,
>> >> > extra 10% of rest of the cost is comparable to 1% fuzzy limit. IMO,
>> we
>> >> > shouldn't bother too much about it as the path costs are not much
>> different.
>> >>
>> >> My feeling is that cranking the sorting cost factor up to 20-25% would
>> >> be a good idea, just so we have less unnecessary plan churn.  I dunno
>> >> if sorting always costs that much, but if a 10% cost overhead is
>> >> really 1% because it only applies to a fraction of the cost, I don't
>> >> think that's good.  The whole point was to pick something large enough
>> >> that we wouldn't take the sorted path unless we will benefit from the
>> >> sort, and clearly that's not what happened here.
>> >>
>> >
>> > PFA patch with the default multiplication factor for sort bumped up to
>> 1.2.
>> >
>>
>> +/* If no remote estimates, assume a sort costs 10% extra */
>> +#define DEFAULT_FDW_SORT_MULTIPLIER 1.2
>>
>> The above comment should not be 20%?
>>
>> Ah! Here's patch with comment fixed.
>
> --
> Best Wishes,
> Ashutosh Bapat
> EnterpriseDB Corporation
> The Postgres Database Company
>



-- 
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 697de60..3cb728f 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -186,23 +186,26 @@ is_foreign_expr(PlannerInfo *root,
 	 * Check that the expression consists of nodes that are safe to execute
 	 * remotely.
 	 */
 	glob_cxt.root = root;
 	glob_cxt.foreignrel = baserel;
 	loc_cxt.collation = InvalidOid;
 	loc_cxt.state = FDW_COLLATE_NONE;
 	if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt))
 		return false;
 
-	/* Expressions examined here should be boolean, ie noncollatable */
-	Assert(loc_cxt.collation == InvalidOid);
-	Assert(loc_cxt.state == FDW_COLLATE_NONE);
+	/*
+	 * If the expression has a valid collation that does not arise from a
+	 * foreign var, the expression can not be sent over.
+	 */
+	if (loc_cxt.state == FDW_COLLATE_UNSAFE)
+		return false;
 
 	/*
 	 * An expression which includes any mutable functions can't be sent over
 	 * because its result is not stable.  For example, sending now() remote
 	 * side could cause confusion from clock offsets.  Future versions might
 	 * be able to make this choice with more granularity.  (We check this last
 	 * because it requires a lot of expensive catalog lookups.)
 	 */
 	if (contain_mutable_functions((Node *) expr))
 		return false;
@@ -1870,10 +1873,57 @@ printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod,
  */
 static void
 printRemotePlaceholder(Oid paramtype, int32 paramtypmod,
 					   deparse_expr_cxt *context)
 {
 	StringInfo	buf = context->buf;
 	char	   *ptypename = format_type_with_typemod(paramtype, paramtypmod);
 
 	appendStringInfo(buf, "((SELECT null::%s)::%s)", ptypename, ptypename);
 }
+
+/*
+ * Deparse ORDER BY clause according to the given pathkeys for given base
+ * relation. From given pathkeys expressions belonging entirely to the given
+ * base relation are obtained and deparsed.
+ */
+void
+appendOrderByClause(StringInfo buf, PlannerInfo *root, RelOptInfo *baserel,
+					List *pathkeys)
+{
+	ListCell			*lcell;
+	deparse_expr_cxt	context;
+	int					nestlevel;
+	char				*delim = " ";
+
+	/* Set up context struct for recursion */
+	context.root = root;
+	context.foreignrel = baserel;
+	context.buf = buf;
+	context.params_list = NULL;
+
+	/* Make sure any constants in the exprs are printed portably */
+	nestlevel = set_transmission_modes();
+
+	appendStringInfo(buf, " ORDER BY");
+	foreach(lcell, pathkeys)
+	{
+		PathKey				*pathkey = lfirst(lcell);
+		Expr				*em_expr;
+
+		em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel);
+		Assert(em_expr != NULL);
+
+		appendStringInfoString(buf, delim);
+		deparseExpr(em_expr, &context);
+		if (pathkey->pk_strategy == BTLessStrategyNumber)
+			appendStringInfoString(buf, " ASC");
+		else
+			appendStringInfoString(buf, " DESC");
+
+		if (pathkey->pk_nulls_first)
+			appendStringInfoString(buf, " NULLS FIRST");
+
+		delim = ", ";
+	}
+	reset_transmission_modes(nestlevel);
+}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 65ea6e8..0159bf5 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -127,86 +127,82 @@ ALTER FOREIGN TABLE ft2 ALTER COLUMN c1 OPTIONS (column_name 'C 1');
 (2 rows)
 
 -- Now we should be able to run ANALYZE.
 -- To exercise multiple code paths, we use local stats on ft1
 -- and remote-estimate mode on ft2.
 ANALYZE ft1;
 ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
 -- ===================================================================
 -- simple queries
 -- ===================================================================
--- single table, with/without alias
+-- single table without alias
 EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
-           QUERY PLAN            
----------------------------------
+        QUERY PLAN         
+---------------------------
  Limit
-   ->  Sort
-         Sort Key: c3, c1
-         ->  Foreign Scan on ft1
-(4 rows)
+   ->  Foreign Scan on ft1
+(2 rows)
 
 SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
  c1  | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
 -----+----+-------+------------------------------+--------------------------+----+------------+-----
  101 |  1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1  | 1          | foo
  102 |  2 | 00102 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2  | 2          | foo
  103 |  3 | 00103 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3  | 3          | foo
  104 |  4 | 00104 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4  | 4          | foo
  105 |  5 | 00105 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5  | 5          | foo
  106 |  6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6  | 6          | foo
  107 |  7 | 00107 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7  | 7          | foo
  108 |  8 | 00108 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8  | 8          | foo
  109 |  9 | 00109 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9  | 9          | foo
  110 |  0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0  | 0          | foo
 (10 rows)
 
-EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+-- single table with alias - also test that tableoid sort is not pushed to remote side
+EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
                                      QUERY PLAN                                      
 -------------------------------------------------------------------------------------
  Limit
-   Output: c1, c2, c3, c4, c5, c6, c7, c8
+   Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
    ->  Sort
-         Output: c1, c2, c3, c4, c5, c6, c7, c8
-         Sort Key: t1.c3, t1.c1
+         Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
+         Sort Key: t1.c3, t1.c1, t1.tableoid
          ->  Foreign Scan on public.ft1 t1
-               Output: c1, c2, c3, c4, c5, c6, c7, c8
+               Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
                Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
 (8 rows)
 
-SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
  c1  | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
 -----+----+-------+------------------------------+--------------------------+----+------------+-----
  101 |  1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1  | 1          | foo
  102 |  2 | 00102 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2  | 2          | foo
  103 |  3 | 00103 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3  | 3          | foo
  104 |  4 | 00104 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4  | 4          | foo
  105 |  5 | 00105 | Tue Jan 06 00:00:00 1970 PST | Tue Jan 06 00:00:00 1970 | 5  | 5          | foo
  106 |  6 | 00106 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6  | 6          | foo
  107 |  7 | 00107 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7  | 7          | foo
  108 |  8 | 00108 | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8  | 8          | foo
  109 |  9 | 00109 | Sat Jan 10 00:00:00 1970 PST | Sat Jan 10 00:00:00 1970 | 9  | 9          | foo
  110 |  0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0  | 0          | foo
 (10 rows)
 
 -- whole-row reference
 EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                     QUERY PLAN                                      
--------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                
+----------------------------------------------------------------------------------------------------------
  Limit
    Output: t1.*, c3, c1
-   ->  Sort
+   ->  Foreign Scan on public.ft1 t1
          Output: t1.*, c3, c1
-         Sort Key: t1.c3, t1.c1
-         ->  Foreign Scan on public.ft1 t1
-               Output: t1.*, c3, c1
-               Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
-(8 rows)
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c3 ASC, "C 1" ASC
+(5 rows)
 
 SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
                                              t1                                             
 --------------------------------------------------------------------------------------------
  (101,1,00101,"Fri Jan 02 00:00:00 1970 PST","Fri Jan 02 00:00:00 1970",1,"1         ",foo)
  (102,2,00102,"Sat Jan 03 00:00:00 1970 PST","Sat Jan 03 00:00:00 1970",2,"2         ",foo)
  (103,3,00103,"Sun Jan 04 00:00:00 1970 PST","Sun Jan 04 00:00:00 1970",3,"3         ",foo)
  (104,4,00104,"Mon Jan 05 00:00:00 1970 PST","Mon Jan 05 00:00:00 1970",4,"4         ",foo)
  (105,5,00105,"Tue Jan 06 00:00:00 1970 PST","Tue Jan 06 00:00:00 1970",5,"5         ",foo)
  (106,6,00106,"Wed Jan 07 00:00:00 1970 PST","Wed Jan 07 00:00:00 1970",6,"6         ",foo)
@@ -643,20 +639,46 @@ SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5));
 
 SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
  c1 | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
 ----+----+-------+------------------------------+--------------------------+----+------------+-----
   1 |  1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1  | 1          | foo
   2 |  2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2  | 2          | foo
   3 |  3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3  | 3          | foo
   4 |  4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4  | 4          | foo
 (4 rows)
 
+-- we should not push order by clause with volatile expressions or unsafe
+-- collations
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, random();
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Sort
+   Output: c1, c2, c3, c4, c5, c6, c7, c8, (random())
+   Sort Key: ft2.c1, (random())
+   ->  Foreign Scan on public.ft2
+         Output: c1, c2, c3, c4, c5, c6, c7, c8, random()
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(6 rows)
+
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, ft2.c3 collate "C";
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Sort
+   Output: c1, c2, c3, c4, c5, c6, c7, c8, ((c3)::text)
+   Sort Key: ft2.c1, ft2.c3 COLLATE "C"
+   ->  Foreign Scan on public.ft2
+         Output: c1, c2, c3, c4, c5, c6, c7, c8, c3
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(6 rows)
+
 -- ===================================================================
 -- parameterized queries
 -- ===================================================================
 -- simple join
 PREPARE st1(int, int) AS SELECT t1.c3, t2.c3 FROM ft1 t1, ft2 t2 WHERE t1.c1 = $1 AND t2.c1 = $2;
 EXPLAIN (VERBOSE, COSTS false) EXECUTE st1(1, 2);
                              QUERY PLAN                             
 --------------------------------------------------------------------
  Nested Loop
    Output: t1.c3, t2.c3
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 1902f1f..914e670 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -40,20 +40,23 @@
 #include "utils/sampling.h"
 
 PG_MODULE_MAGIC;
 
 /* Default CPU cost to start up a foreign query. */
 #define DEFAULT_FDW_STARTUP_COST	100.0
 
 /* Default CPU cost to process 1 row (above and beyond cpu_tuple_cost). */
 #define DEFAULT_FDW_TUPLE_COST		0.01
 
+/* If no remote estimates, assume a sort costs 20% extra */
+#define DEFAULT_FDW_SORT_MULTIPLIER 1.2
+
 /*
  * FDW-specific planner information kept in RelOptInfo.fdw_private for a
  * foreign table.  This information is collected by postgresGetForeignRelSize.
  */
 typedef struct PgFdwRelationInfo
 {
 	/* baserestrictinfo clauses, broken down into safe and unsafe subsets. */
 	List	   *remote_conds;
 	List	   *local_conds;
 
@@ -289,20 +292,21 @@ static bool postgresAnalyzeForeignTable(Relation relation,
 							BlockNumber *totalpages);
 static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt,
 							Oid serverOid);
 
 /*
  * Helper functions
  */
 static void estimate_path_cost_size(PlannerInfo *root,
 						RelOptInfo *baserel,
 						List *join_conds,
+						List *pathkeys,
 						double *p_rows, int *p_width,
 						Cost *p_startup_cost, Cost *p_total_cost);
 static void get_remote_estimate(const char *sql,
 					PGconn *conn,
 					double *rows,
 					int *width,
 					Cost *startup_cost,
 					Cost *total_cost);
 static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
 						  EquivalenceClass *ec, EquivalenceMember *em,
@@ -490,21 +494,21 @@ postgresGetForeignRelSize(PlannerInfo *root,
 	 * average row width.  Otherwise, estimate using whatever statistics we
 	 * have locally, in a way similar to ordinary tables.
 	 */
 	if (fpinfo->use_remote_estimate)
 	{
 		/*
 		 * Get cost/size estimates with help of remote server.  Save the
 		 * values in fpinfo so we don't need to do it again to generate the
 		 * basic foreign path.
 		 */
-		estimate_path_cost_size(root, baserel, NIL,
+		estimate_path_cost_size(root, baserel, NIL, NIL,
 								&fpinfo->rows, &fpinfo->width,
 								&fpinfo->startup_cost, &fpinfo->total_cost);
 
 		/* Report estimated baserel size to planner. */
 		baserel->rows = fpinfo->rows;
 		baserel->width = fpinfo->width;
 	}
 	else
 	{
 		/*
@@ -520,57 +524,112 @@ postgresGetForeignRelSize(PlannerInfo *root,
 		{
 			baserel->pages = 10;
 			baserel->tuples =
 				(10 * BLCKSZ) / (baserel->width + MAXALIGN(SizeofHeapTupleHeader));
 		}
 
 		/* Estimate baserel size as best we can with local statistics. */
 		set_baserel_size_estimates(root, baserel);
 
 		/* Fill in basically-bogus cost estimates for use later. */
-		estimate_path_cost_size(root, baserel, NIL,
+		estimate_path_cost_size(root, baserel, NIL, NIL,
 								&fpinfo->rows, &fpinfo->width,
 								&fpinfo->startup_cost, &fpinfo->total_cost);
 	}
 }
 
 /*
  * postgresGetForeignPaths
  *		Create possible scan paths for a scan on the foreign table
  */
 static void
 postgresGetForeignPaths(PlannerInfo *root,
 						RelOptInfo *baserel,
 						Oid foreigntableid)
 {
 	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
 	ForeignPath *path;
 	List	   *ppi_list;
 	ListCell   *lc;
+	List	   *usable_pathkeys = NIL;
 
 	/*
 	 * Create simplest ForeignScan path node and add it to baserel.  This path
 	 * corresponds to SeqScan path of regular tables (though depending on what
 	 * baserestrict conditions we were able to send to remote, there might
 	 * actually be an indexscan happening there).  We already did all the work
 	 * to estimate cost and size of this path.
 	 */
 	path = create_foreignscan_path(root, baserel,
 								   fpinfo->rows,
 								   fpinfo->startup_cost,
 								   fpinfo->total_cost,
 								   NIL, /* no pathkeys */
 								   NULL,		/* no outer rel either */
 								   NIL);		/* no fdw_private list */
 	add_path(baserel, (Path *) path);
 
 	/*
+	 * Determine whether we can potentially push query pathkeys to the remote
+	 * side, avoiding a local sort.
+	 */
+	foreach(lc, root->query_pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+		Expr	   *em_expr;
+
+		/*
+		 * is_foreign_expr would detect volatile expressions as well, but
+		 * ec_has_volatile saves some cycles.
+		 */
+		if (!pathkey_ec->ec_has_volatile &&
+			(em_expr = find_em_expr_for_rel(pathkey_ec, baserel)) &&
+			is_foreign_expr(root, baserel, em_expr))
+			usable_pathkeys = lappend(usable_pathkeys, pathkey);
+		else
+		{
+			/*
+			 * The planner and executor don't have any clever strategy for
+			 * taking data sorted by a prefix of the query's pathkeys and
+			 * getting it to be sorted by all of those pathekeys.  We'll just
+			 * end up resorting the entire data set.  So, unless we can push
+			 * down all of the query pathkeys, forget it.
+			 */
+			list_free(usable_pathkeys);
+			usable_pathkeys = NIL;
+			break;
+		}
+	}
+
+	/* Create a path with useful pathkeys, if we found one. */
+	if (usable_pathkeys != NULL)
+	{
+		double		rows;
+		int			width;
+		Cost		startup_cost;
+		Cost		total_cost;
+
+		estimate_path_cost_size(root, baserel, NIL, usable_pathkeys,
+								&rows, &width, &startup_cost, &total_cost);
+
+		add_path(baserel, (Path *)
+				 create_foreignscan_path(root, baserel,
+										 rows,
+										 startup_cost,
+										 total_cost,
+										 usable_pathkeys,
+										 NULL,
+										 NIL));
+	}
+
+	/*
 	 * If we're not using remote estimates, stop here.  We have no way to
 	 * estimate whether any join clauses would be worth sending across, so
 	 * don't bother building parameterized paths.
 	 */
 	if (!fpinfo->use_remote_estimate)
 		return;
 
 	/*
 	 * Thumb through all join clauses for the rel to identify which outer
 	 * relations could supply one or more safe-to-send-to-remote join clauses.
@@ -703,21 +762,21 @@ postgresGetForeignPaths(PlannerInfo *root,
 	foreach(lc, ppi_list)
 	{
 		ParamPathInfo *param_info = (ParamPathInfo *) lfirst(lc);
 		double		rows;
 		int			width;
 		Cost		startup_cost;
 		Cost		total_cost;
 
 		/* Get a cost estimate from the remote */
 		estimate_path_cost_size(root, baserel,
-								param_info->ppi_clauses,
+								param_info->ppi_clauses, NIL,
 								&rows, &width,
 								&startup_cost, &total_cost);
 
 		/*
 		 * ppi_rows currently won't get looked at by anything, but still we
 		 * may as well ensure that it matches our idea of the rowcount.
 		 */
 		param_info->ppi_rows = rows;
 
 		/* Make the path */
@@ -804,20 +863,24 @@ postgresGetForeignPlan(PlannerInfo *root,
 	 * Build the query string to be sent for execution, and identify
 	 * expressions to be sent as parameters.
 	 */
 	initStringInfo(&sql);
 	deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used,
 					 &retrieved_attrs);
 	if (remote_conds)
 		appendWhereClause(&sql, root, baserel, remote_conds,
 						  true, &params_list);
 
+	/* Add ORDER BY clause if we found any useful pathkeys */
+	if (best_path->path.pathkeys)
+		appendOrderByClause(&sql, root, baserel, best_path->path.pathkeys);
+
 	/*
 	 * Add FOR UPDATE/SHARE if appropriate.  We apply locking during the
 	 * initial row fetch, rather than later on as is done for local tables.
 	 * The extra roundtrips involved in trying to duplicate the local
 	 * semantics exactly don't seem worthwhile (see also comments for
 	 * RowMarkType).
 	 *
 	 * Note: because we actually run the query as a cursor, this assumes that
 	 * DECLARE CURSOR ... FOR UPDATE is supported, which it isn't before 8.3.
 	 */
@@ -1721,20 +1784,21 @@ postgresExplainForeignModify(ModifyTableState *mtstate,
  * estimate_path_cost_size
  *		Get cost and size estimates for a foreign scan
  *
  * We assume that all the baserestrictinfo clauses will be applied, plus
  * any join clauses listed in join_conds.
  */
 static void
 estimate_path_cost_size(PlannerInfo *root,
 						RelOptInfo *baserel,
 						List *join_conds,
+						List *pathkeys,
 						double *p_rows, int *p_width,
 						Cost *p_startup_cost, Cost *p_total_cost)
 {
 	PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private;
 	double		rows;
 	double		retrieved_rows;
 	int			width;
 	Cost		startup_cost;
 	Cost		total_cost;
 	Cost		run_cost;
@@ -1773,20 +1837,23 @@ estimate_path_cost_size(PlannerInfo *root,
 		appendStringInfoString(&sql, "EXPLAIN ");
 		deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used,
 						 &retrieved_attrs);
 		if (fpinfo->remote_conds)
 			appendWhereClause(&sql, root, baserel, fpinfo->remote_conds,
 							  true, NULL);
 		if (remote_join_conds)
 			appendWhereClause(&sql, root, baserel, remote_join_conds,
 							  (fpinfo->remote_conds == NIL), NULL);
 
+		if (pathkeys)
+			appendOrderByClause(&sql, root, baserel, pathkeys);
+
 		/* Get the remote estimate */
 		conn = GetConnection(fpinfo->server, fpinfo->user, false);
 		get_remote_estimate(sql.data, conn, &rows, &width,
 							&startup_cost, &total_cost);
 		ReleaseConnection(conn);
 
 		retrieved_rows = rows;
 
 		/* Factor in the selectivity of the locally-checked quals */
 		local_sel = clauselist_selectivity(root,
@@ -1830,20 +1897,35 @@ estimate_path_cost_size(PlannerInfo *root,
 		 * too.
 		 */
 		startup_cost = 0;
 		run_cost = 0;
 		run_cost += seq_page_cost * baserel->pages;
 
 		startup_cost += baserel->baserestrictcost.startup;
 		cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 		run_cost += cpu_per_tuple * baserel->tuples;
 
+		/*
+		 * Without remote estimates, we have no real way to estimate the cost
+		 * of generating sorted output.  It could be free if the query plan
+		 * the remote side would have chosen generates properly-sorted output
+		 * anyway, but in most cases it will cost something.  Estimate a value
+		 * high enough that we won't pick the sorted path when the ordering
+		 * isn't locally useful, but low enough that we'll err on the side of
+		 * pushing down the ORDER BY clause when it's useful to do so.
+		 */
+		if (pathkeys != NIL)
+		{
+			startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+			run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+		}
+
 		total_cost = startup_cost + run_cost;
 	}
 
 	/*
 	 * Add some additional cost factors to account for connection overhead
 	 * (fdw_startup_cost), transferring data across the network
 	 * (fdw_tuple_cost per retrieved row), and local manipulation of the data
 	 * (cpu_tuple_cost per retrieved row).
 	 */
 	startup_cost += fpinfo->fdw_startup_cost;
@@ -2995,10 +3077,38 @@ static void
 conversion_error_callback(void *arg)
 {
 	ConversionLocation *errpos = (ConversionLocation *) arg;
 	TupleDesc	tupdesc = RelationGetDescr(errpos->rel);
 
 	if (errpos->cur_attno > 0 && errpos->cur_attno <= tupdesc->natts)
 		errcontext("column \"%s\" of foreign table \"%s\"",
 				   NameStr(tupdesc->attrs[errpos->cur_attno - 1]->attname),
 				   RelationGetRelationName(errpos->rel));
 }
+
+/*
+ * Find an equivalence class member expression, all of whose Vars, come from
+ * the indicated relation.
+ */
+extern Expr *
+find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
+{
+	ListCell   *lc_em;
+
+	foreach(lc_em, ec->ec_members)
+	{
+		EquivalenceMember *em = lfirst(lc_em);
+
+		if (bms_equal(em->em_relids, rel->relids))
+		{
+			/*
+			 * If there is more than one equivalence member whose Vars are
+			 * taken entirely from this relation, we'll be content to choose
+			 * any one of those.
+			 */
+			return em->em_expr;
+		}
+	}
+
+	/* We didn't find any suitable equivalence class expression */
+	return NULL;
+}
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 3835ddb..8956cd2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -67,12 +67,15 @@ extern void deparseUpdateSql(StringInfo buf, PlannerInfo *root,
 				 List *targetAttrs, List *returningList,
 				 List **retrieved_attrs);
 extern void deparseDeleteSql(StringInfo buf, PlannerInfo *root,
 				 Index rtindex, Relation rel,
 				 List *returningList,
 				 List **retrieved_attrs);
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 				  List **retrieved_attrs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
+extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern void appendOrderByClause(StringInfo buf, PlannerInfo *root,
+								RelOptInfo *baserel, List *pathkeys);
 
 #endif   /* POSTGRES_FDW_H */
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 11160f8..a87a63f 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -131,25 +131,26 @@ ALTER FOREIGN TABLE ft2 ALTER COLUMN c1 OPTIONS (column_name 'C 1');
 
 -- Now we should be able to run ANALYZE.
 -- To exercise multiple code paths, we use local stats on ft1
 -- and remote-estimate mode on ft2.
 ANALYZE ft1;
 ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
 
 -- ===================================================================
 -- simple queries
 -- ===================================================================
--- single table, with/without alias
+-- single table without alias
 EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
 SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
-EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+-- single table with alias - also test that tableoid sort is not pushed to remote side
+EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
+SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
 -- whole-row reference
 EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
 SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
 -- empty result
 SELECT * FROM ft1 WHERE false;
 -- with WHERE clause
 EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE t1.c1 = 101 AND t1.c6 = '1' AND t1.c7 >= '1';
 SELECT * FROM ft1 t1 WHERE t1.c1 = 101 AND t1.c6 = '1' AND t1.c7 >= '1';
 -- with FOR UPDATE/SHARE
 EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c1 = 101 FOR UPDATE;
@@ -207,20 +208,26 @@ EXPLAIN (VERBOSE, COSTS false)
 SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2;
 -- check both safe and unsafe join conditions
 EXPLAIN (VERBOSE, COSTS false)
   SELECT * FROM ft2 a, ft2 b
   WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
 SELECT * FROM ft2 a, ft2 b
 WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
 -- bug before 9.3.5 due to sloppy handling of remote-estimate parameters
 SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5));
 SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
+-- we should not push order by clause with volatile expressions or unsafe
+-- collations
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, random();
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, ft2.c3 collate "C";
 
 -- ===================================================================
 -- parameterized queries
 -- ===================================================================
 -- simple join
 PREPARE st1(int, int) AS SELECT t1.c3, t2.c3 FROM ft1 t1, ft2 t2 WHERE t1.c1 = $1 AND t2.c1 = $2;
 EXPLAIN (VERBOSE, COSTS false) EXECUTE st1(1, 2);
 EXECUTE st1(1, 1);
 EXECUTE st1(101, 101);
 -- subquery using stable function (can't be sent to remote)
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to