On 2016/12/20 0:37, Tom Lane wrote:
Etsuro Fujita <fujita.ets...@lab.ntt.co.jp> writes:
On 2016/12/17 1:13, Tom Lane wrote:
So I think the rule could be

"When first asked to produce a path for a given foreign joinrel, collect
the cheapest paths for its left and right inputs, and make a nestloop path
(or hashjoin path, if full join) from those, using the join quals needed
for the current input relation pair.

Seems reasonable.

Use this as the fdw_outerpath for
all foreign paths made for the joinrel."

I'm not sure that would work well for foreign joins with sort orders.
Consider a merge join, whose left input is a 2-way foreign join with a
sort order that implements a full join and whose right input is a sorted
local table scan.  If the EPQ subplan for the foreign join wouldn't
produce the right sort order, the merge join might break during EPQ
rechecks (note that in this case the EPQ subplan for the foreign join
might produce more than a single row during an EPQ recheck).

How so?  We only recheck one row at a time, therefore it can be claimed to
have any sort order you care about.

I'll have second thoughts about that. I agree with you except for that, so I've created a patch; I removed GetExistingLocalJoinPath and added a helper function, CreateLocalJoinPath, that generates a local join path for a given foreign join, as described above. Please find attached a patch.

Best regards,
Etsuro Fujita
*** a/contrib/postgres_fdw/expected/postgres_fdw.out
--- b/contrib/postgres_fdw/expected/postgres_fdw.out
***************
*** 1519,1540 **** SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
!                ->  Merge Join
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Merge Cond: (t1.c1 = t2.c1)
!                      ->  Sort
                             Output: t1.c1, t1.c3, t1.*
!                            Sort Key: t1.c1
!                            ->  Foreign Scan on public.ft1 t1
!                                  Output: t1.c1, t1.c3, t1.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
!                      ->  Sort
                             Output: t2.c1, t2.*
!                            Sort Key: t2.c1
!                            ->  Foreign Scan on public.ft2 t2
!                                  Output: t2.c1, t2.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
! (23 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
   c1  | c1  
--- 1519,1534 ----
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1
!                ->  Nested Loop
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Join Filter: (t1.c1 = t2.c1)
!                      ->  Foreign Scan on public.ft1 t1
                             Output: t1.c1, t1.c3, t1.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
!                      ->  Foreign Scan on public.ft2 t2
                             Output: t2.c1, t2.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
! (17 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE OF t1;
   c1  | c1  
***************
*** 1563,1584 **** SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
!                ->  Merge Join
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Merge Cond: (t1.c1 = t2.c1)
!                      ->  Sort
                             Output: t1.c1, t1.c3, t1.*
!                            Sort Key: t1.c1
!                            ->  Foreign Scan on public.ft1 t1
!                                  Output: t1.c1, t1.c3, t1.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
!                      ->  Sort
                             Output: t2.c1, t2.*
!                            Sort Key: t2.c1
!                            ->  Foreign Scan on public.ft2 t2
!                                  Output: t2.c1, t2.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
! (23 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
   c1  | c1  
--- 1557,1572 ----
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR UPDATE OF r1 FOR UPDATE OF r2
!                ->  Nested Loop
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Join Filter: (t1.c1 = t2.c1)
!                      ->  Foreign Scan on public.ft1 t1
                             Output: t1.c1, t1.c3, t1.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
!                      ->  Foreign Scan on public.ft2 t2
                             Output: t2.c1, t2.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR UPDATE
! (17 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR UPDATE;
   c1  | c1  
***************
*** 1608,1629 **** SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
!                ->  Merge Join
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Merge Cond: (t1.c1 = t2.c1)
!                      ->  Sort
                             Output: t1.c1, t1.c3, t1.*
!                            Sort Key: t1.c1
!                            ->  Foreign Scan on public.ft1 t1
!                                  Output: t1.c1, t1.c3, t1.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
!                      ->  Sort
                             Output: t2.c1, t2.*
!                            Sort Key: t2.c1
!                            ->  Foreign Scan on public.ft2 t2
!                                  Output: t2.c1, t2.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
! (23 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
   c1  | c1  
--- 1596,1611 ----
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1
!                ->  Nested Loop
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Join Filter: (t1.c1 = t2.c1)
!                      ->  Foreign Scan on public.ft1 t1
                             Output: t1.c1, t1.c3, t1.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
!                      ->  Foreign Scan on public.ft2 t2
                             Output: t2.c1, t2.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
! (17 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE OF t1;
   c1  | c1  
***************
*** 1652,1673 **** SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
!                ->  Merge Join
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Merge Cond: (t1.c1 = t2.c1)
!                      ->  Sort
                             Output: t1.c1, t1.c3, t1.*
!                            Sort Key: t1.c1
!                            ->  Foreign Scan on public.ft1 t1
!                                  Output: t1.c1, t1.c3, t1.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
!                      ->  Sort
                             Output: t2.c1, t2.*
!                            Sort Key: t2.c1
!                            ->  Foreign Scan on public.ft2 t2
!                                  Output: t2.c1, t2.*
!                                  Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
! (23 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
   c1  | c1  
--- 1634,1649 ----
                 Output: t1.c1, t2.c1, t1.c3, t1.*, t2.*
                 Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2)
                 Remote SQL: SELECT r1."C 1", r1.c3, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2."C 1", CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST FOR SHARE OF r1 FOR SHARE OF r2
!                ->  Nested Loop
                       Output: t1.c1, t1.c3, t1.*, t2.c1, t2.*
!                      Join Filter: (t1.c1 = t2.c1)
!                      ->  Foreign Scan on public.ft1 t1
                             Output: t1.c1, t1.c3, t1.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
!                      ->  Foreign Scan on public.ft2 t2
                             Output: t2.c1, t2.*
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" FOR SHARE
! (17 rows)
  
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
   c1  | c1  
***************
*** 1684,1689 **** SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
--- 1660,1726 ----
   110 | 110
  (10 rows)
  
+ -- FOR UPDATE/SHARE in situations where a full join is pushed down
+ EXPLAIN (VERBOSE, COSTS OFF)
+ SELECT t1.c1, t2.c1, t3.c1 FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (ft4 t2 FULL JOIN ft5 t3 ON (t2.c1 = t3.c1)) ON (TRUE) ORDER BY t1.c1, t2.c1, t3.c1 FOR UPDATE OF t1;
+                                                                                                                                                      QUERY PLAN                                                                                                                                                      
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+  LockRows
+    Output: "T 3".c1, t2.c1, t3.c1, "T 3".ctid, t2.*, t3.*
+    ->  Nested Loop
+          Output: "T 3".c1, t2.c1, t3.c1, "T 3".ctid, t2.*, t3.*
+          ->  Foreign Scan
+                Output: t2.c1, t2.*, t3.c1, t3.*
+                Relations: (public.ft4 t2) FULL JOIN (public.ft5 t3)
+                Remote SQL: SELECT r2.c1, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r3.c1, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END FROM ("S 1"."T 3" r2 FULL JOIN "S 1"."T 4" r3 ON (((r2.c1 = r3.c1)))) ORDER BY r2.c1 ASC NULLS LAST, r3.c1 ASC NULLS LAST
+                ->  Hash Full Join
+                      Output: t2.c1, t2.*, t3.c1, t3.*
+                      Hash Cond: (t2.c1 = t3.c1)
+                      ->  Foreign Scan on public.ft4 t2
+                            Output: t2.c1, t2.*
+                            Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
+                      ->  Hash
+                            Output: t3.c1, t3.*
+                            ->  Foreign Scan on public.ft5 t3
+                                  Output: t3.c1, t3.*
+                                  Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4"
+          ->  Materialize
+                Output: "T 3".c1, "T 3".ctid
+                ->  Seq Scan on "S 1"."T 3"
+                      Output: "T 3".c1, "T 3".ctid
+                      Filter: ("T 3".c1 = 50)
+ (24 rows)
+ 
+ EXPLAIN (VERBOSE, COSTS OFF)
+ SELECT t1.c1, t2.c1, t3.c1 FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (ft4 t2 FULL JOIN ft5 t3 ON (FALSE)) ON (TRUE) ORDER BY t1.c1, t2.c1, t3.c1 FOR UPDATE OF t1;
+                                                                                                                                                 QUERY PLAN                                                                                                                                                 
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+  LockRows
+    Output: "T 3".c1, t2.c1, t3.c1, "T 3".ctid, t2.*, t3.*
+    ->  Nested Loop
+          Output: "T 3".c1, t2.c1, t3.c1, "T 3".ctid, t2.*, t3.*
+          ->  Foreign Scan
+                Output: t2.c1, t2.*, t3.c1, t3.*
+                Relations: (public.ft4 t2) FULL JOIN (public.ft5 t3)
+                Remote SQL: SELECT r2.c1, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r3.c1, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END FROM ("S 1"."T 3" r2 FULL JOIN "S 1"."T 4" r3 ON ((false))) ORDER BY r2.c1 ASC NULLS LAST, r3.c1 ASC NULLS LAST
+                ->  Merge Full Join
+                      Output: t2.c1, t2.*, t3.c1, t3.*
+                      Join Filter: false
+                      ->  Foreign Scan on public.ft4 t2
+                            Output: t2.c1, t2.*
+                            Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
+                      ->  Materialize
+                            Output: t3.c1, t3.*
+                            ->  Foreign Scan on public.ft5 t3
+                                  Output: t3.c1, t3.*
+                                  Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4"
+          ->  Materialize
+                Output: "T 3".c1, "T 3".ctid
+                ->  Seq Scan on "S 1"."T 3"
+                      Output: "T 3".c1, "T 3".ctid
+                      Filter: ("T 3".c1 = 50)
+ (24 rows)
+ 
  -- join in CTE
  EXPLAIN (VERBOSE, COSTS OFF)
  WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
***************
*** 4016,4033 **** UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
           Output: ft2.c1, (ft2.c2 + 500), NULL::integer, (ft2.c3 || '_update9'::text), ft2.c4, ft2.c5, ft2.c6, 'ft2       '::character(10), ft2.c8, ft2.ctid, ft1.*
           Relations: (public.ft2) INNER JOIN (public.ft1)
           Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c8, r1.ctid, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 9)))) FOR UPDATE OF r1
!          ->  Hash Join
                 Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid, ft1.*
!                Hash Cond: (ft2.c2 = ft1.c1)
                 ->  Foreign Scan on public.ft2
                       Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid
                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c8, ctid FROM "S 1"."T 1" FOR UPDATE
!                ->  Hash
                       Output: ft1.*, ft1.c1
!                      ->  Foreign Scan on public.ft1
!                            Output: ft1.*, ft1.c1
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 9))
! (17 rows)
  
  UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
    FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9;
--- 4053,4068 ----
           Output: ft2.c1, (ft2.c2 + 500), NULL::integer, (ft2.c3 || '_update9'::text), ft2.c4, ft2.c5, ft2.c6, 'ft2       '::character(10), ft2.c8, ft2.ctid, ft1.*
           Relations: (public.ft2) INNER JOIN (public.ft1)
           Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c8, r1.ctid, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 9)))) FOR UPDATE OF r1
!          ->  Nested Loop
                 Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid, ft1.*
!                Join Filter: (ft2.c2 = ft1.c1)
                 ->  Foreign Scan on public.ft2
                       Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c8, ft2.ctid
                       Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c8, ctid FROM "S 1"."T 1" FOR UPDATE
!                ->  Foreign Scan on public.ft1
                       Output: ft1.*, ft1.c1
!                      Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 9))
! (15 rows)
  
  UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
    FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9;
***************
*** 4159,4176 **** DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2;
           Output: ft2.ctid, ft1.*
           Relations: (public.ft2) INNER JOIN (public.ft1)
           Remote SQL: SELECT r1.ctid, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 2)))) FOR UPDATE OF r1
!          ->  Hash Join
                 Output: ft2.ctid, ft1.*
!                Hash Cond: (ft2.c2 = ft1.c1)
                 ->  Foreign Scan on public.ft2
                       Output: ft2.ctid, ft2.c2
                       Remote SQL: SELECT c2, ctid FROM "S 1"."T 1" FOR UPDATE
!                ->  Hash
                       Output: ft1.*, ft1.c1
!                      ->  Foreign Scan on public.ft1
!                            Output: ft1.*, ft1.c1
!                            Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 2))
! (17 rows)
  
  DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2;
  SELECT c1,c2,c3,c4 FROM ft2 ORDER BY c1;
--- 4194,4209 ----
           Output: ft2.ctid, ft1.*
           Relations: (public.ft2) INNER JOIN (public.ft1)
           Remote SQL: SELECT r1.ctid, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2."C 1", r2.c2, r2.c3, r2.c4, r2.c5, r2.c6, r2.c7, r2.c8) END FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1.c2 = r2."C 1")) AND (((r2."C 1" % 10) = 2)))) FOR UPDATE OF r1
!          ->  Nested Loop
                 Output: ft2.ctid, ft1.*
!                Join Filter: (ft2.c2 = ft1.c1)
                 ->  Foreign Scan on public.ft2
                       Output: ft2.ctid, ft2.c2
                       Remote SQL: SELECT c2, ctid FROM "S 1"."T 1" FOR UPDATE
!                ->  Foreign Scan on public.ft1
                       Output: ft1.*, ft1.c1
!                      Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 2))
! (15 rows)
  
  DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2;
  SELECT c1,c2,c3,c4 FROM ft2 ORDER BY c1;
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 4365,4374 **** postgresGetForeignJoinPaths(PlannerInfo *root,
  		root->parse->commandType == CMD_UPDATE ||
  		root->rowMarks)
  	{
! 		epq_path = GetExistingLocalJoinPath(joinrel);
  		if (!epq_path)
  		{
! 			elog(DEBUG3, "could not push down foreign join because a local path suitable for EPQ checks was not found");
  			return;
  		}
  	}
--- 4365,4375 ----
  		root->parse->commandType == CMD_UPDATE ||
  		root->rowMarks)
  	{
! 		epq_path = CreateLocalJoinPath(root, joinrel, outerrel, innerrel,
! 									   jointype, extra);
  		if (!epq_path)
  		{
! 			elog(DEBUG3, "could not push down foreign join because a local path suitable for EPQ checks was not created");
  			return;
  		}
  	}
*** a/contrib/postgres_fdw/sql/postgres_fdw.sql
--- b/contrib/postgres_fdw/sql/postgres_fdw.sql
***************
*** 445,450 **** SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
--- 445,455 ----
  EXPLAIN (VERBOSE, COSTS OFF)
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
  SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
+ -- FOR UPDATE/SHARE in situations where a full join is pushed down
+ EXPLAIN (VERBOSE, COSTS OFF)
+ SELECT t1.c1, t2.c1, t3.c1 FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (ft4 t2 FULL JOIN ft5 t3 ON (t2.c1 = t3.c1)) ON (TRUE) ORDER BY t1.c1, t2.c1, t3.c1 FOR UPDATE OF t1;
+ EXPLAIN (VERBOSE, COSTS OFF)
+ SELECT t1.c1, t2.c1, t3.c1 FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (ft4 t2 FULL JOIN ft5 t3 ON (FALSE)) ON (TRUE) ORDER BY t1.c1, t2.c1, t3.c1 FOR UPDATE OF t1;
  -- join in CTE
  EXPLAIN (VERBOSE, COSTS OFF)
  WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
*** a/doc/src/sgml/fdwhandler.sgml
--- b/doc/src/sgml/fdwhandler.sgml
***************
*** 995,1007 **** RecheckForeignScan (ForeignScanState *node, TupleTableSlot *slot);
       can be executed and the resulting tuple can be stored in the slot.
       This plan need not be efficient since no base table will return more
       than one row; for example, it may implement all joins as nested loops.
!      The function <literal>GetExistingLocalJoinPath</> may be used to search
!      existing paths for a suitable local join path, which can be used as the
!      alternative local join plan.  <literal>GetExistingLocalJoinPath</>
!      searches for an unparameterized path in the path list of the specified
!      join relation.  (If it does not find such a path, it returns NULL, in
!      which case a foreign data wrapper may build the local path by itself or
!      may choose not to create access paths for that join.)
      </para>
     </sect2>
  
--- 995,1008 ----
       can be executed and the resulting tuple can be stored in the slot.
       This plan need not be efficient since no base table will return more
       than one row; for example, it may implement all joins as nested loops.
!      The function <literal>CreateLocalJoinPath</> may be used to build
!      a suitable local join path, which can be used as the alternative local
!      join plan.  <literal>CreateLocalJoinPath</> builds a nested loop join
!      path for the specified join relation, except when the join type is
!      <literal>FULL</>, in which case a merge or hash join path is built.
!      (If it does not build such a path, it returns NULL, in which case a
!      foreign data wrapper may build the local path by itself or may choose
!      not to create access paths for that join.)
      </para>
     </sect2>
  
*** a/src/backend/foreign/foreign.c
--- b/src/backend/foreign/foreign.c
***************
*** 22,27 ****
--- 22,29 ----
  #include "foreign/foreign.h"
  #include "lib/stringinfo.h"
  #include "miscadmin.h"
+ #include "optimizer/cost.h"
+ #include "optimizer/pathnode.h"
  #include "utils/builtins.h"
  #include "utils/memutils.h"
  #include "utils/rel.h"
***************
*** 693,805 **** get_foreign_server_oid(const char *servername, bool missing_ok)
  	return oid;
  }
  
  /*
!  * Get a copy of an existing local path for a given join relation.
!  *
!  * This function is usually helpful to obtain an alternate local path for EPQ
!  * checks.
!  *
!  * Right now, this function only supports unparameterized foreign joins, so we
!  * only search for unparameterized path in the given list of paths. Since we
!  * are searching for a path which can be used to construct an alternative local
!  * plan for a foreign join, we look for only MergeJoin, HashJoin or NestLoop
!  * paths.
!  *
!  * If the inner or outer subpath of the chosen path is a ForeignScan, we
!  * replace it with its outer subpath.  For this reason, and also because the
!  * planner might free the original path later, the path returned by this
!  * function is a shallow copy of the original.  There's no need to copy
!  * the substructure, so we don't.
   *
!  * Since the plan created using this path will presumably only be used to
!  * execute EPQ checks, efficiency of the path is not a concern. But since the
!  * path list in RelOptInfo is anyway sorted by total cost we are likely to
!  * choose the most efficient path, which is all for the best.
   */
! extern Path *
! GetExistingLocalJoinPath(RelOptInfo *joinrel)
  {
! 	ListCell   *lc;
! 
! 	Assert(joinrel->reloptkind == RELOPT_JOINREL);
! 
! 	foreach(lc, joinrel->pathlist)
! 	{
! 		Path	   *path = (Path *) lfirst(lc);
! 		JoinPath   *joinpath = NULL;
! 
! 		/* Skip parameterised paths. */
! 		if (path->param_info != NULL)
! 			continue;
! 
! 		switch (path->pathtype)
! 		{
! 			case T_HashJoin:
! 				{
! 					HashPath   *hash_path = makeNode(HashPath);
! 
! 					memcpy(hash_path, path, sizeof(HashPath));
! 					joinpath = (JoinPath *) hash_path;
! 				}
! 				break;
! 
! 			case T_NestLoop:
! 				{
! 					NestPath   *nest_path = makeNode(NestPath);
! 
! 					memcpy(nest_path, path, sizeof(NestPath));
! 					joinpath = (JoinPath *) nest_path;
! 				}
! 				break;
  
! 			case T_MergeJoin:
! 				{
! 					MergePath  *merge_path = makeNode(MergePath);
  
! 					memcpy(merge_path, path, sizeof(MergePath));
! 					joinpath = (JoinPath *) merge_path;
! 				}
! 				break;
! 
! 			default:
! 
! 				/*
! 				 * Just skip anything else. We don't know if corresponding
! 				 * plan would build the output row from whole-row references
! 				 * of base relations and execute the EPQ checks.
! 				 */
! 				break;
! 		}
! 
! 		/* This path isn't good for us, check next. */
! 		if (!joinpath)
! 			continue;
  
  		/*
! 		 * If either inner or outer path is a ForeignPath corresponding to a
! 		 * pushed down join, replace it with the fdw_outerpath, so that we
! 		 * maintain path for EPQ checks built entirely of local join
! 		 * strategies.
  		 */
! 		if (IsA(joinpath->outerjoinpath, ForeignPath))
  		{
! 			ForeignPath *foreign_path;
  
! 			foreign_path = (ForeignPath *) joinpath->outerjoinpath;
! 			if (foreign_path->path.parent->reloptkind == RELOPT_JOINREL)
! 				joinpath->outerjoinpath = foreign_path->fdw_outerpath;
  		}
! 
! 		if (IsA(joinpath->innerjoinpath, ForeignPath))
  		{
! 			ForeignPath *foreign_path;
  
! 			foreign_path = (ForeignPath *) joinpath->innerjoinpath;
! 			if (foreign_path->path.parent->reloptkind == RELOPT_JOINREL)
! 				joinpath->innerjoinpath = foreign_path->fdw_outerpath;
  		}
  
! 		return (Path *) joinpath;
  	}
! 	return NULL;
  }
--- 695,806 ----
  	return oid;
  }
  
+ 
  /*
!  * Generate a local join path for a foreign join
   *
!  * Note: currently, this function only supports unparameterized foreign joins.
   */
! Path *
! CreateLocalJoinPath(PlannerInfo *root,
! 					RelOptInfo *joinrel,
! 					RelOptInfo *outerrel,
! 					RelOptInfo *innerrel,
! 					JoinType jointype,
! 					JoinPathExtraData *extra)
  {
! 	JoinCostWorkspace workspace;
! 	Path	   *outer_path = outerrel->cheapest_total_path;
! 	Path	   *inner_path = innerrel->cheapest_total_path;
! 	Path	   *result;
  
! 	/* Should only be called in these cases. */
! 	Assert(jointype == JOIN_INNER ||
! 		   jointype == JOIN_LEFT ||
! 		   jointype == JOIN_FULL ||
! 		   jointype == JOIN_SEMI ||
! 		   jointype == JOIN_ANTI);
  
! 	/* Give up if the cheapest-total-cost paths are parameterized. */
! 	if (!bms_is_empty(PATH_REQ_OUTER(outer_path)) ||
! 		!bms_is_empty(PATH_REQ_OUTER(inner_path)))
! 		return NULL;
  
+ 	/*
+ 	 * If the jointype is JOIN_FULL, try to create a hashjoin join path from
+ 	 * the cheapest-total-cost paths; otherwise, create a nestloop join path.
+ 	 */
+ 	if (jointype == JOIN_FULL)
+ 	{
  		/*
! 		 * Special corner case: for "x FULL JOIN y ON true", there will be no
! 		 * join clauses at all.  Note that mergejoin is our only join type
! 		 * that supports FULL JOIN without any join clauses, and in that case
! 		 * it doesn't require the input paths to be well ordered, so generate
! 		 * a clauseless mergejoin path from the cheapest-total-cost paths.
  		 */
! 		if (extra->mergejoin_allowed && !extra->mergeclause_list)
  		{
! 			/* Get the initial cost estimates. */
! 			initial_cost_mergejoin(root, &workspace, jointype, NIL,
! 								   outer_path, inner_path, NIL, NIL,
! 								   extra->sjinfo);
  
! 			/* Generate a mergejoin join path. */
! 			result = (Path *) create_mergejoin_path(root,
! 													joinrel,
! 													jointype,
! 													&workspace,
! 													extra->sjinfo,
! 													outer_path,
! 													inner_path,
! 													extra->restrictlist,
! 													NIL, NULL,
! 													NIL, NIL, NIL);
  		}
! 		else if (extra->hashclause_list)
  		{
! 			/* Get the initial cost estimates. */
! 			initial_cost_hashjoin(root, &workspace, jointype,
! 								  extra->hashclause_list,
! 								  outer_path, inner_path,
! 								  extra->sjinfo, &extra->semifactors);
  
! 			/* Generate a hashjoin join path. */
! 			result = (Path *) create_hashjoin_path(root,
! 												   joinrel,
! 												   jointype,
! 												   &workspace,
! 												   extra->sjinfo,
! 												   &extra->semifactors,
! 												   outer_path,
! 												   inner_path,
! 												   extra->restrictlist,
! 												   NULL,
! 												   extra->hashclause_list);
  		}
+ 		else
+ 			result = NULL;
+ 	}
+ 	else
+ 	{
+ 		/* Get the initial cost estimates. */
+ 		initial_cost_nestloop(root, &workspace, jointype,
+ 						  outer_path, inner_path,
+ 						  extra->sjinfo, &extra->semifactors);
  
! 		/* Generate a nestloop join path. */
! 		result = (Path *) create_nestloop_path(root,
! 											   joinrel,
! 											   jointype,
! 											   &workspace,
! 											   extra->sjinfo,
! 											   &extra->semifactors,
! 											   outer_path,
! 											   inner_path,
! 											   extra->restrictlist,
! 											   NIL, NULL);
  	}
! 
! 	return result;
  }
*** a/src/backend/optimizer/path/joinpath.c
--- b/src/backend/optimizer/path/joinpath.c
***************
*** 90,95 **** add_paths_to_joinrel(PlannerInfo *root,
--- 90,97 ----
  
  	extra.restrictlist = restrictlist;
  	extra.mergeclause_list = NIL;
+ 	extra.hashclause_list = NIL;
+ 	extra.mergejoin_allowed = false;
  	extra.sjinfo = sjinfo;
  	extra.param_source_rels = NULL;
  
***************
*** 100,105 **** add_paths_to_joinrel(PlannerInfo *root,
--- 102,108 ----
  	 * it's a full join.
  	 */
  	if (enable_mergejoin || jointype == JOIN_FULL)
+ 	{
  		extra.mergeclause_list = select_mergejoin_clauses(root,
  														  joinrel,
  														  outerrel,
***************
*** 107,112 **** add_paths_to_joinrel(PlannerInfo *root,
--- 110,117 ----
  														  restrictlist,
  														  jointype,
  														  &mergejoin_allowed);
+ 		extra.mergejoin_allowed = mergejoin_allowed;
+ 	}
  
  	/*
  	 * If it's SEMI or ANTI join, compute correction factors for cost
***************
*** 1320,1325 **** hash_inner_and_outer(PlannerInfo *root,
--- 1325,1333 ----
  		hashclauses = lappend(hashclauses, restrictinfo);
  	}
  
+ 	/* Save hashclauses for possible use by the FDW */
+ 	extra->hashclause_list = hashclauses;
+ 
  	/* If we found any usable hashclauses, make paths */
  	if (hashclauses)
  	{
*** a/src/include/foreign/fdwapi.h
--- b/src/include/foreign/fdwapi.h
***************
*** 235,240 **** extern FdwRoutine *GetFdwRoutineByRelId(Oid relid);
  extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy);
  extern bool IsImportableForeignTable(const char *tablename,
  						 ImportForeignSchemaStmt *stmt);
! extern Path *GetExistingLocalJoinPath(RelOptInfo *joinrel);
  
  #endif   /* FDWAPI_H */
--- 235,242 ----
  extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy);
  extern bool IsImportableForeignTable(const char *tablename,
  						 ImportForeignSchemaStmt *stmt);
! extern Path *CreateLocalJoinPath(PlannerInfo *root, RelOptInfo *joinrel,
! 					RelOptInfo *outerrel, RelOptInfo *innerrel,
! 					JoinType jointype, JoinPathExtraData *extra);
  
  #endif   /* FDWAPI_H */
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
***************
*** 2006,2011 **** typedef struct SemiAntiJoinFactors
--- 2006,2014 ----
   *		clauses that apply to this join
   * mergeclause_list is a list of RestrictInfo nodes for available
   *		mergejoin clauses in this join
+  * hashclause_list is a list of RestrictInfo nodes for available
+  *		hashjoin clauses in this join
+  * mergejoin_allowed is a flag to indicate whether mergejoins are allowed
   * sjinfo is extra info about special joins for selectivity estimation
   * semifactors is as shown above (only valid for SEMI or ANTI joins)
   * param_source_rels are OK targets for parameterization of result paths
***************
*** 2014,2019 **** typedef struct JoinPathExtraData
--- 2017,2024 ----
  {
  	List	   *restrictlist;
  	List	   *mergeclause_list;
+ 	List	   *hashclause_list;
+ 	bool		mergejoin_allowed;
  	SpecialJoinInfo *sjinfo;
  	SemiAntiJoinFactors semifactors;
  	Relids		param_source_rels;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to