From afaa0ccea09fddf7bd1114d0aaba9febaff3b245 Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Sun, 4 Mar 2018 21:15:57 +1300
Subject: [PATCH v2 1/2] Disallow LEFT JOIN removal when join or base quals
 have volatile functions

Removing joins in such cases can cause volatile functions not to be executed
at all.
---
 src/backend/optimizer/plan/analyzejoins.c | 18 ++++++++++++++++
 src/test/regress/expected/join.out        | 34 +++++++++++++++++++++++++++++++
 src/test/regress/sql/join.sql             | 17 ++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index ef25fefa455..40f284b29c9 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -28,6 +28,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "utils/lsyscache.h"
@@ -236,6 +237,16 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			return false;		/* it does reference innerrel */
 	}
 
+	/*
+	 * Join removal must be disallowed if the baserestrictinfos contain any
+	 * volatile functions.  If we removed this join then there's no chance of
+	 * these functions being executed and some side affects of these may be
+	 * required.
+	 */
+	if (contain_volatile_functions((Node *)
+				extract_actual_clauses(innerrel->baserestrictinfo, false)))
+		return false;
+
 	/*
 	 * Search for mergejoinable clauses that constrain the inner rel against
 	 * either the outer rel or a pseudoconstant.  If an operator is
@@ -267,6 +278,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			continue;			/* else, ignore; not useful here */
 		}
 
+		/*
+		 * Disallow the join removal completely if the join clause contains
+		 * any volatile functions.
+		 */
+		if (contain_volatile_functions((Node *) restrictinfo->clause))
+			return false;
+
 		/* Ignore if it's not a mergejoinable clause */
 		if (!restrictinfo->can_join ||
 			restrictinfo->mergeopfamilies == NIL)
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 4d5931d67ef..76f31fad103 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4053,6 +4053,12 @@ INSERT INTO a VALUES (0, 0), (1, NULL);
 INSERT INTO b VALUES (0, 0), (1, NULL);
 INSERT INTO c VALUES (0), (1);
 INSERT INTO d VALUES (1,3), (2,2), (3,1);
+CREATE OR REPLACE FUNCTION notice(pn INT) RETURNS INT AS $$
+BEGIN
+	RAISE NOTICE '%', pn;
+	RETURN pn;
+END;
+$$ VOLATILE LANGUAGE plpgsql;
 -- all three cases should be optimizable into a simple seqscan
 explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
   QUERY PLAN   
@@ -4162,6 +4168,34 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4
  Seq Scan on int8_tbl i8
 (1 row)
 
+-- ensure the join removal is disallowed when there are any volatile functions
+-- in the base clauses to the removal rel.
+explain (costs off)
+select a.* from a left join b on a.b_id = b.id and notice(b.c_id) = 1;
+             QUERY PLAN             
+------------------------------------
+ Hash Right Join
+   Hash Cond: (b.id = a.b_id)
+   ->  Seq Scan on b
+         Filter: (notice(c_id) = 1)
+   ->  Hash
+         ->  Seq Scan on a
+(6 rows)
+
+-- ensure the join removal is disallowed when there are any volatile functions
+-- in the join clauses.
+explain (costs off)
+select a.* from a left join b on a.b_id = b.id and a.b_id = notice(b.id);
+               QUERY PLAN               
+----------------------------------------
+ Hash Left Join
+   Hash Cond: (a.b_id = b.id)
+   Join Filter: (a.b_id = notice(b.id))
+   ->  Seq Scan on a
+   ->  Hash
+         ->  Seq Scan on b
+(6 rows)
+
 -- check join removal with lateral references
 explain (costs off)
 select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 30dfde223ef..4be64eec293 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1328,6 +1328,13 @@ INSERT INTO b VALUES (0, 0), (1, NULL);
 INSERT INTO c VALUES (0), (1);
 INSERT INTO d VALUES (1,3), (2,2), (3,1);
 
+CREATE OR REPLACE FUNCTION notice(pn INT) RETURNS INT AS $$
+BEGIN
+	RAISE NOTICE '%', pn;
+	RETURN pn;
+END;
+$$ VOLATILE LANGUAGE plpgsql;
+
 -- all three cases should be optimizable into a simple seqscan
 explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
 explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
@@ -1376,6 +1383,16 @@ explain (costs off)
 select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4
   on i8.q1 = i4.f1;
 
+-- ensure the join removal is disallowed when there are any volatile functions
+-- in the base clauses to the removal rel.
+explain (costs off)
+select a.* from a left join b on a.b_id = b.id and notice(b.c_id) = 1;
+
+-- ensure the join removal is disallowed when there are any volatile functions
+-- in the join clauses.
+explain (costs off)
+select a.* from a left join b on a.b_id = b.id and a.b_id = notice(b.id);
+
 -- check join removal with lateral references
 explain (costs off)
 select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
-- 
2.16.2.windows.1

