From 2d069323ed8c3e5283093a5ed91069df467487f3 Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Thu, 18 Jul 2019 10:33:20 +0900
Subject: [PATCH v2 2/3] Fix partitionwise join to handle FULL JOINs correctly

---
 src/backend/optimizer/util/relnode.c         |  86 +++++++++---
 src/test/regress/expected/partition_join.out | 200 +++++++++++++++++++++++++++
 src/test/regress/sql/partition_join.sql      |  29 ++++
 3 files changed, 299 insertions(+), 16 deletions(-)

diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index f21ec9bdfc..9ec1bacaff 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -64,6 +64,7 @@ static bool have_partkey_equi_join(RelOptInfo *joinrel,
 					   JoinType jointype, List *restrictlist);
 static int match_join_arg_to_partition_keys(Expr *expr, RelOptInfo *rel,
 					bool strict_op);
+static List *extract_coalesce_args(Expr *expr);
 static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
 								RelOptInfo *outer_rel, RelOptInfo *inner_rel,
 								JoinType jointype);
@@ -1821,6 +1822,8 @@ static int
 match_join_arg_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
 {
 	int			cnt;
+	int			matched = -1;
+	List	   *nullable_exprs;
 
 	/* This function should be called only for partitioned relations. */
 	Assert(rel->part_scheme);
@@ -1829,36 +1832,87 @@ match_join_arg_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
 	while (IsA(expr, RelabelType))
 		expr = (Expr *) (castNode(RelabelType, expr))->arg;
 
+	/*
+	 * Extract the arguments from possibly nested COALESCE expressions.  Each
+	 * of these arguments could be null when joining, so these expressions are
+	 * called as such and are to be matched only with the nullable partition
+	 * keys.
+	 */
+	if (IsA(expr, CoalesceExpr))
+		nullable_exprs = extract_coalesce_args(expr);
+	else
+		/*
+		 * expr may or may not be nullable but add to the list anyway to
+		 * simplify the coding below.
+		 */
+		nullable_exprs = list_make1(expr);
+
 	for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++)
 	{
-		ListCell   *lc;
-
 		Assert(rel->partexprs);
-		foreach(lc, rel->partexprs[cnt])
+
+		/* Is the expression one of the non-nullable partition keys? */
+		if (list_member(rel->partexprs[cnt], expr))
 		{
-			if (equal(lfirst(lc), expr))
-				return cnt;
+			matched = cnt;
+			break;
 		}
 
+		/*
+		 * Nope, so check if it is one of the nullable keys.  Allowing
+		 * nullable keys won't work if the join operator is not strict,
+		 * because null partition keys may then join with rows from other
+		 * partitions.  XXX - would that ever be true if the operator is
+		 * already determined to be mergejoin- and hashjoin-able?
+		 */
 		if (!strict_op)
 			continue;
 
-		/*
-		 * If it's a strict equi-join a NULL partition key on one side will
-		 * not join a NULL partition key on the other side. So, rows with NULL
-		 * partition key from a partition on one side can not join with those
-		 * from a non-matching partition on the other side. So, search the
-		 * nullable partition keys as well.
-		 */
+		/* OK to match with nullable keys. */
 		Assert(rel->nullable_partexprs);
-		foreach(lc, rel->nullable_partexprs[cnt])
+		if (list_intersection(rel->nullable_partexprs[cnt],
+							  nullable_exprs) != NIL)
 		{
-			if (equal(lfirst(lc), expr))
-				return cnt;
+			matched = cnt;
+			break;
 		}
 	}
 
-	return -1;
+	Assert(list_length(nullable_exprs) >= 1);
+	list_free(nullable_exprs);
+
+	return matched;
+}
+
+/*
+ * extract_coalesce_args
+ *		Extract all arguments from arbitrarily nested CoalesceExpr's
+ *
+ * Note: caller should free the List structure when done using it.
+ */
+static List *
+extract_coalesce_args(Expr *expr)
+{
+	List   *coalesce_args = NIL;
+
+	while (expr && IsA(expr, CoalesceExpr))
+	{
+		CoalesceExpr *cexpr = (CoalesceExpr *) expr;
+		ListCell *lc;
+
+		expr = NULL;
+		foreach(lc, cexpr->args)
+		{
+			if (IsA(lfirst(lc), CoalesceExpr))
+				expr = lfirst(lc);
+			else
+				coalesce_args = lappend(coalesce_args, lfirst(lc));
+		}
+
+		Assert(expr == NULL || IsA(expr, CoalesceExpr));
+	}
+
+	return coalesce_args;
 }
 
 /*
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 1296edcdae..885f754f10 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -2003,3 +2003,203 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b =
                            Filter: (b = 0)
 (16 rows)
 
+-- N-way join consisting of 2 or more full joins
+DROP TABLE prt1_n_p2;
+CREATE TABLE prt1_n_p2 PARTITION OF prt1_n FOR VALUES FROM ('0250') TO ('0500') PARTITION BY RANGE (c);
+CREATE TABLE prt1_n_p2_1 PARTITION OF prt1_n_p2 FOR VALUES FROM ('0250') TO ('0350');
+CREATE TABLE prt1_n_p2_2 PARTITION OF prt1_n_p2 FOR VALUES FROM ('0350') TO ('0500');
+INSERT INTO prt1_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(250, 499, 2) i;
+ANALYZE prt1_n;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: (COALESCE(COALESCE(COALESCE(t1.c, t2.c), t3.c), t4.c))
+         ->  Append
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(COALESCE(t1.c, t2.c), t3.c))::text = (t4.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((COALESCE(t1.c, t2.c))::text = (t3.c)::text)
+                           ->  Hash Full Join
+                                 Hash Cond: ((t1.c)::text = (t2.c)::text)
+                                 ->  Seq Scan on prt1_n_p1 t1
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p1 t2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p1 t3
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p1 t4
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(COALESCE(t1_1.c, t2_1.c), t3_1.c))::text = (t4_1.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((COALESCE(t1_1.c, t2_1.c))::text = (t3_1.c)::text)
+                           ->  Hash Full Join
+                                 Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
+                                 ->  Seq Scan on prt1_n_p2_1 t1_1
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p2_1 t2_1
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p2_1 t3_1
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p2_1 t4_1
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(COALESCE(t1_2.c, t2_2.c), t3_2.c))::text = (t4_2.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((COALESCE(t1_2.c, t2_2.c))::text = (t3_2.c)::text)
+                           ->  Hash Full Join
+                                 Hash Cond: ((t1_2.c)::text = (t2_2.c)::text)
+                                 ->  Seq Scan on prt1_n_p2_2 t1_2
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p2_2 t2_2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p2_2 t3_2
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p2_2 t4_2
+(43 rows)
+
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+  c   | a | b | a | b | a | b | a | b 
+------+---+---+---+---+---+---+---+---
+ 0000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0
+ 0002 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2
+ 0004 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4
+ 0006 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6
+ 0008 | 8 | 8 | 8 | 8 | 8 | 8 | 8 | 8
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+                                     QUERY PLAN                                     
+------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: (COALESCE(COALESCE(t1.c, t3.c), t4.c))
+         ->  Append
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(t1.c, t3.c))::text = (t4.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((t1.c)::text = (t3.c)::text)
+                           ->  Hash Left Join
+                                 Hash Cond: ((t1.c)::text = (t2.c)::text)
+                                 ->  Seq Scan on prt1_n_p1 t1
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p1 t2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p1 t3
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p1 t4
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(t1_1.c, t3_1.c))::text = (t4_1.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((t1_1.c)::text = (t3_1.c)::text)
+                           ->  Hash Left Join
+                                 Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
+                                 ->  Seq Scan on prt1_n_p2_1 t1_1
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p2_1 t2_1
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p2_1 t3_1
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p2_1 t4_1
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(t1_2.c, t3_2.c))::text = (t4_2.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((t1_2.c)::text = (t3_2.c)::text)
+                           ->  Hash Left Join
+                                 Hash Cond: ((t1_2.c)::text = (t2_2.c)::text)
+                                 ->  Seq Scan on prt1_n_p2_2 t1_2
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p2_2 t2_2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p2_2 t3_2
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p2_2 t4_2
+(43 rows)
+
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+  c   | a | b | a | b | a | b | a | b 
+------+---+---+---+---+---+---+---+---
+ 0000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0
+ 0002 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2
+ 0004 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4
+ 0006 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6
+ 0008 | 8 | 8 | 8 | 8 | 8 | 8 | 8 | 8
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+                                     QUERY PLAN                                     
+------------------------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: (COALESCE(COALESCE(t1.c, t3.c), t4.c))
+         ->  Append
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(t1.c, t3.c))::text = (t4.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((t1.c)::text = (t3.c)::text)
+                           ->  Hash Join
+                                 Hash Cond: ((t1.c)::text = (t2.c)::text)
+                                 ->  Seq Scan on prt1_n_p1 t1
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p1 t2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p1 t3
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p1 t4
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(t1_1.c, t3_1.c))::text = (t4_1.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((t1_1.c)::text = (t3_1.c)::text)
+                           ->  Hash Join
+                                 Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
+                                 ->  Seq Scan on prt1_n_p2_1 t1_1
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p2_1 t2_1
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p2_1 t3_1
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p2_1 t4_1
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(t1_2.c, t3_2.c))::text = (t4_2.c)::text)
+                     ->  Hash Full Join
+                           Hash Cond: ((t1_2.c)::text = (t3_2.c)::text)
+                           ->  Hash Join
+                                 Hash Cond: ((t1_2.c)::text = (t2_2.c)::text)
+                                 ->  Seq Scan on prt1_n_p2_2 t1_2
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_n_p2_2 t2_2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_n_p2_2 t3_2
+                     ->  Hash
+                           ->  Seq Scan on prt1_n_p2_2 t4_2
+(43 rows)
+
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+  c   | a | b | a | b | a | b | a | b 
+------+---+---+---+---+---+---+---+---
+ 0000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0
+ 0002 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2
+ 0004 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4
+ 0006 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6
+ 0008 | 8 | 8 | 8 | 8 | 8 | 8 | 8 | 8
+(5 rows)
+
+SET enable_hashjoin TO off;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+ERROR:  could not find pathkey item to sort
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+ERROR:  could not find pathkey item to sort
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+ERROR:  could not find pathkey item to sort
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+ERROR:  could not find pathkey item to sort
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+ERROR:  could not find pathkey item to sort
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+ERROR:  could not find pathkey item to sort
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
index db9a6b4a96..97ec983cec 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -270,6 +270,7 @@ EXPLAIN (COSTS OFF)
 SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.b = t2.b AND t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
 SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.b = t2.b AND t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
 
+
 -- test default partition behavior for range
 ALTER TABLE prt1 DETACH PARTITION prt1_p3;
 ALTER TABLE prt1 ATTACH PARTITION prt1_p3 DEFAULT;
@@ -435,3 +436,31 @@ ANALYZE prt2;
 
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- N-way join consisting of 2 or more full joins
+DROP TABLE prt1_n_p2;
+CREATE TABLE prt1_n_p2 PARTITION OF prt1_n FOR VALUES FROM ('0250') TO ('0500') PARTITION BY RANGE (c);
+CREATE TABLE prt1_n_p2_1 PARTITION OF prt1_n_p2 FOR VALUES FROM ('0250') TO ('0350');
+CREATE TABLE prt1_n_p2_2 PARTITION OF prt1_n_p2 FOR VALUES FROM ('0350') TO ('0500');
+INSERT INTO prt1_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(250, 499, 2) i;
+ANALYZE prt1_n;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+SET enable_hashjoin TO off;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+SELECT * FROM prt1_n t1 FULL JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+SELECT * FROM prt1_n t1 LEFT JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
+SELECT * FROM prt1_n t1 INNER JOIN prt1_n t2 USING (c) FULL JOIN prt1_n t3 USING (c) FULL JOIN prt1_n t4 USING (c) ORDER BY c LIMIT 5;
-- 
2.11.0

