On Thu, Oct 15, 2015 at 6:28 AM, Ashutosh Bapat
<ashutosh.ba...@enterprisedb.com> wrote:
> Attached is the patch which takes care of above comments.

I spent some time on this patch today.  But it's still not right.

I've attached a new version which fixes a serious problem with your
last version - having postgresGetForeignPaths do the costing of the
sorted path itself instead of delegating that to
estimate_path_cost_size is wrong.  In your version, 10% increment gets
applied to the network transmission costs as well as the cost of
generating the tupes - but only when use_remote_estimate == false.  I
fixed this and did some cosmetic cleanup.

But you'll notice if you try this some of postgres_fdw's regression
tests fail.  This is rather mysterious:

***************
*** 697,715 ****
   Sort
     Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
     Sort Key: t1.c1
!    ->  Nested Loop Semi Join
           Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
!          Join Filter: (t1.c3 = t2.c3)
           ->  Foreign Scan on public.ft1 t1
                 Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
                 Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8
FROM "S 1"."T 1" WHERE (("C 1" < 20))
!          ->  Materialize
                 Output: t2.c3
!                ->  Foreign Scan on public.ft2 t2
                       Output: t2.c3
!                      Filter: (date(t2.c4) = '01-17-1970'::date)
!                      Remote SQL: SELECT c3, c4 FROM "S 1"."T 1"
WHERE (("C 1" > 10))
! (15 rows)

  EXECUTE st2(10, 20);
   c1 | c2 |  c3   |              c4              |            c5
      | c6 |     c7     | c8
--- 697,718 ----
   Sort
     Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
     Sort Key: t1.c1
!    ->  Hash Join
           Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
!          Hash Cond: (t1.c3 = t2.c3)
           ->  Foreign Scan on public.ft1 t1
                 Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
                 Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8
FROM "S 1"."T 1" WHERE (("C 1" < 20))
!          ->  Hash
                 Output: t2.c3
!                ->  HashAggregate
                       Output: t2.c3
!                      Group Key: t2.c3
!                      ->  Foreign Scan on public.ft2 t2
!                            Output: t2.c3
!                            Filter: (date(t2.c4) = '01-17-1970'::date)
!                            Remote SQL: SELECT c3, c4 FROM "S 1"."T
1" WHERE (("C 1" > 10))
! (18 rows)

What I think is happening here is that the planner notices that
instead of doing a parameterized nestloop, it could pull down the data
already sorted from the remote side, cheaply unique-ify it by using
the ordering provided by the remote side, and then do a standard hash
join.  That might well be a sensible approach, but the ORDER BY that
would make it correct doesn't show up in the Remote SQL.  I don't know
why that's happening, but it's not good.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 697de60..cb5c3ae 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -193,9 +193,12 @@ is_foreign_expr(PlannerInfo *root,
 	if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt))
 		return false;
 
-	/* Expressions examined here should be boolean, ie noncollatable */
-	Assert(loc_cxt.collation == InvalidOid);
-	Assert(loc_cxt.state == FDW_COLLATE_NONE);
+	/*
+	 * The collation of the expression should be none or originate from a
+	 * foreign var.
+	 */
+	Assert(loc_cxt.state == FDW_COLLATE_NONE ||
+			loc_cxt.state == FDW_COLLATE_SAFE);
 
 	/*
 	 * An expression which includes any mutable functions can't be sent over
@@ -1877,3 +1880,50 @@ printRemotePlaceholder(Oid paramtype, int32 paramtypmod,
 
 	appendStringInfo(buf, "((SELECT null::%s)::%s)", ptypename, ptypename);
 }
+
+/*
+ * Deparse ORDER BY clause according to the given pathkeys for given base
+ * relation. From given pathkeys expressions belonging entirely to the given
+ * base relation are obtained and deparsed.
+ */
+void
+appendOrderByClause(StringInfo buf, PlannerInfo *root, RelOptInfo *baserel,
+					List *pathkeys)
+{
+	ListCell			*lcell;
+	deparse_expr_cxt	context;
+	int					nestlevel;
+	char				*delim = " ";
+
+	/* Set up context struct for recursion */
+	context.root = root;
+	context.foreignrel = baserel;
+	context.buf = buf;
+	context.params_list = NULL;
+
+	/* Make sure any constants in the exprs are printed portably */
+	nestlevel = set_transmission_modes();
+
+	appendStringInfo(buf, " ORDER BY");
+	foreach(lcell, pathkeys)
+	{
+		PathKey				*pathkey = lfirst(lcell);
+		Expr				*em_expr;
+
+		em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel);
+		Assert(em_expr != NULL);
+
+		appendStringInfoString(buf, delim);
+		deparseExpr(em_expr, &context);
+		if (pathkey->pk_strategy == BTLessStrategyNumber)
+			appendStringInfoString(buf, " ASC");
+		else
+			appendStringInfoString(buf, " DESC");
+
+		if (pathkey->pk_nulls_first)
+			appendStringInfoString(buf, " NULLS FIRST");
+
+		delim = ", ";
+	}
+	reset_transmission_modes(nestlevel);
+}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 65ea6e8..58bf76c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -134,15 +134,13 @@ ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
 -- ===================================================================
 -- simple queries
 -- ===================================================================
--- single table, with/without alias
+-- single table without alias
 EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
-           QUERY PLAN            
----------------------------------
+        QUERY PLAN         
+---------------------------
  Limit
-   ->  Sort
-         Sort Key: c3, c1
-         ->  Foreign Scan on ft1
-(4 rows)
+   ->  Foreign Scan on ft1
+(2 rows)
 
 SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
  c1  | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
@@ -159,20 +157,21 @@ SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
  110 |  0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0  | 0          | foo
 (10 rows)
 
-EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+-- single table with alias - also test that tableoid sort is not pushed to remote side
+EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
                                      QUERY PLAN                                      
 -------------------------------------------------------------------------------------
  Limit
-   Output: c1, c2, c3, c4, c5, c6, c7, c8
+   Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
    ->  Sort
-         Output: c1, c2, c3, c4, c5, c6, c7, c8
-         Sort Key: t1.c3, t1.c1
+         Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
+         Sort Key: t1.c3, t1.c1, t1.tableoid
          ->  Foreign Scan on public.ft1 t1
-               Output: c1, c2, c3, c4, c5, c6, c7, c8
+               Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
                Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
 (8 rows)
 
-SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
  c1  | c2 |  c3   |              c4              |            c5            | c6 |     c7     | c8  
 -----+----+-------+------------------------------+--------------------------+----+------------+-----
  101 |  1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1  | 1          | foo
@@ -189,17 +188,14 @@ SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
 
 -- whole-row reference
 EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-                                     QUERY PLAN                                      
--------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                
+----------------------------------------------------------------------------------------------------------
  Limit
    Output: t1.*, c3, c1
-   ->  Sort
+   ->  Foreign Scan on public.ft1 t1
          Output: t1.*, c3, c1
-         Sort Key: t1.c3, t1.c1
-         ->  Foreign Scan on public.ft1 t1
-               Output: t1.*, c3, c1
-               Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
-(8 rows)
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c3 ASC, "C 1" ASC
+(5 rows)
 
 SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
                                              t1                                             
@@ -650,6 +646,19 @@ SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
   4 |  4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4  | 4          | foo
 (4 rows)
 
+-- we should not push order by clause with volatile expressions
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, random();
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Sort
+   Output: c1, c2, c3, c4, c5, c6, c7, c8, (random())
+   Sort Key: ft2.c1, (random())
+   ->  Foreign Scan on public.ft2
+         Output: c1, c2, c3, c4, c5, c6, c7, c8, random()
+         Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
+(6 rows)
+
 -- ===================================================================
 -- parameterized queries
 -- ===================================================================
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 1902f1f..f4d709d 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -47,6 +47,9 @@ PG_MODULE_MAGIC;
 /* Default CPU cost to process 1 row (above and beyond cpu_tuple_cost). */
 #define DEFAULT_FDW_TUPLE_COST		0.01
 
+/* If no remote estimates, assume a sort costs 10% extra */
+#define DEFAULT_FDW_SORT_MULTIPLIER 1.1
+
 /*
  * FDW-specific planner information kept in RelOptInfo.fdw_private for a
  * foreign table.  This information is collected by postgresGetForeignRelSize.
@@ -296,6 +299,7 @@ static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt,
 static void estimate_path_cost_size(PlannerInfo *root,
 						RelOptInfo *baserel,
 						List *join_conds,
+						List *pathkeys,
 						double *p_rows, int *p_width,
 						Cost *p_startup_cost, Cost *p_total_cost);
 static void get_remote_estimate(const char *sql,
@@ -497,7 +501,7 @@ postgresGetForeignRelSize(PlannerInfo *root,
 		 * values in fpinfo so we don't need to do it again to generate the
 		 * basic foreign path.
 		 */
-		estimate_path_cost_size(root, baserel, NIL,
+		estimate_path_cost_size(root, baserel, NIL, NIL,
 								&fpinfo->rows, &fpinfo->width,
 								&fpinfo->startup_cost, &fpinfo->total_cost);
 
@@ -527,7 +531,7 @@ postgresGetForeignRelSize(PlannerInfo *root,
 		set_baserel_size_estimates(root, baserel);
 
 		/* Fill in basically-bogus cost estimates for use later. */
-		estimate_path_cost_size(root, baserel, NIL,
+		estimate_path_cost_size(root, baserel, NIL, NIL,
 								&fpinfo->rows, &fpinfo->width,
 								&fpinfo->startup_cost, &fpinfo->total_cost);
 	}
@@ -546,6 +550,7 @@ postgresGetForeignPaths(PlannerInfo *root,
 	ForeignPath *path;
 	List	   *ppi_list;
 	ListCell   *lc;
+	List	   *usable_pathkeys = NIL;
 
 	/*
 	 * Create simplest ForeignScan path node and add it to baserel.  This path
@@ -564,6 +569,60 @@ postgresGetForeignPaths(PlannerInfo *root,
 	add_path(baserel, (Path *) path);
 
 	/*
+	 * Determine whether we can potentially push query pathkeys to the remote
+	 * side, avoiding a local sort.
+	 */
+	foreach(lc, root->query_pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(lc);
+		EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+		Expr	   *em_expr;
+
+		/*
+		 * is_foreign_expr would detect volatile expressions as well, but
+		 * ec_has_volatile saves some cycles.
+		 */
+		if (!pathkey_ec->ec_has_volatile &&
+			(em_expr = find_em_expr_for_rel(pathkey_ec, baserel)) &&
+			is_foreign_expr(root, baserel, em_expr))
+			usable_pathkeys = lappend(usable_pathkeys, pathkey);
+		else
+		{
+			/*
+			 * The planner and executor don't have any clever strategy for
+			 * taking data sorted by a prefix of the query's pathkeys and
+			 * getting it to be sorted by all of those pathekeys.  We'll just
+			 * end up resorting the entire data set.  So, unless we can push
+			 * down all of the query pathkeys, forget it.
+			 */
+			list_free(usable_pathkeys);
+			usable_pathkeys = NIL;
+			break;
+		}
+	}
+
+	/* Create a path with useful pathkeys, if we found one. */
+	if (usable_pathkeys != NULL)
+	{
+		double		rows;
+		int			width;
+		Cost		startup_cost;
+		Cost		total_cost;
+
+		estimate_path_cost_size(root, baserel, NIL, usable_pathkeys,
+								&rows, &width, &startup_cost, &total_cost);
+
+		add_path(baserel, (Path *)
+				 create_foreignscan_path(root, baserel,
+										 rows,
+										 startup_cost,
+										 total_cost,
+										 usable_pathkeys,
+										 NULL,
+										 NIL));
+	}
+
+	/*
 	 * If we're not using remote estimates, stop here.  We have no way to
 	 * estimate whether any join clauses would be worth sending across, so
 	 * don't bother building parameterized paths.
@@ -710,7 +769,7 @@ postgresGetForeignPaths(PlannerInfo *root,
 
 		/* Get a cost estimate from the remote */
 		estimate_path_cost_size(root, baserel,
-								param_info->ppi_clauses,
+								param_info->ppi_clauses, NIL,
 								&rows, &width,
 								&startup_cost, &total_cost);
 
@@ -811,6 +870,10 @@ postgresGetForeignPlan(PlannerInfo *root,
 		appendWhereClause(&sql, root, baserel, remote_conds,
 						  true, &params_list);
 
+	/* Add ORDER BY clause if we found any useful pathkeys */
+	if (best_path->path.pathkeys)
+		appendOrderByClause(&sql, root, baserel, best_path->path.pathkeys);
+
 	/*
 	 * Add FOR UPDATE/SHARE if appropriate.  We apply locking during the
 	 * initial row fetch, rather than later on as is done for local tables.
@@ -1728,6 +1791,7 @@ static void
 estimate_path_cost_size(PlannerInfo *root,
 						RelOptInfo *baserel,
 						List *join_conds,
+						List *pathkeys,
 						double *p_rows, int *p_width,
 						Cost *p_startup_cost, Cost *p_total_cost)
 {
@@ -1780,6 +1844,9 @@ estimate_path_cost_size(PlannerInfo *root,
 			appendWhereClause(&sql, root, baserel, remote_join_conds,
 							  (fpinfo->remote_conds == NIL), NULL);
 
+		if (pathkeys)
+			appendOrderByClause(&sql, root, baserel, pathkeys);
+
 		/* Get the remote estimate */
 		conn = GetConnection(fpinfo->server, fpinfo->user, false);
 		get_remote_estimate(sql.data, conn, &rows, &width,
@@ -1837,6 +1904,21 @@ estimate_path_cost_size(PlannerInfo *root,
 		cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 		run_cost += cpu_per_tuple * baserel->tuples;
 
+		/*
+		 * Without remote estimates, we have no real way to estimate the cost
+		 * of generating sorted output.  It could be free if the query plan
+		 * the remote side would have chosen generates properly-sorted output
+		 * anyway, but in most cases it will cost something.  Estimate a value
+		 * high enough that we won't pick the sorted path when the ordering
+		 * isn't locally useful, but low enough that we'll err on the sidea of
+		 * pushing down the ORDER BY clause when it's useful to do so.
+		 */
+		if (pathkeys != NIL)
+		{
+			startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+			run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+		}
+
 		total_cost = startup_cost + run_cost;
 	}
 
@@ -3002,3 +3084,31 @@ conversion_error_callback(void *arg)
 				   NameStr(tupdesc->attrs[errpos->cur_attno - 1]->attname),
 				   RelationGetRelationName(errpos->rel));
 }
+
+/*
+ * Find an equivalence class member expression all of whose Vars come from
+ * the indicated relation.
+ */
+extern Expr *
+find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
+{
+	ListCell   *lc_em;
+
+	foreach(lc_em, ec->ec_members)
+	{
+		EquivalenceMember *em = lfirst(lc_em);
+
+		if (bms_equal(em->em_relids, rel->relids))
+		{
+			/*
+			 * If there is more than one equivalence member whose Vars are
+			 * taken entirely from this relation, we'll be content to choose
+			 * any one of those.
+			 */
+			return em->em_expr;
+		}
+	}
+
+	/* We didn't find any suitable equivalence class expression */
+	return NULL;
+}
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 3835ddb..8956cd2 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -74,5 +74,8 @@ extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 				  List **retrieved_attrs);
 extern void deparseStringLiteral(StringInfo buf, const char *val);
+extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern void appendOrderByClause(StringInfo buf, PlannerInfo *root,
+								RelOptInfo *baserel, List *pathkeys);
 
 #endif   /* POSTGRES_FDW_H */
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 11160f8..861464c 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -138,11 +138,12 @@ ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
 -- ===================================================================
 -- simple queries
 -- ===================================================================
--- single table, with/without alias
+-- single table without alias
 EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
 SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
-EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
+-- single table with alias - also test that tableoid sort is not pushed to remote side
+EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
+SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
 -- whole-row reference
 EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
 SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
@@ -214,6 +215,9 @@ WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
 -- bug before 9.3.5 due to sloppy handling of remote-estimate parameters
 SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5));
 SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
+-- we should not push order by clause with volatile expressions
+EXPLAIN (VERBOSE, COSTS false)
+	SELECT * FROM ft2 ORDER BY ft2.c1, random();
 
 -- ===================================================================
 -- parameterized queries
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to