On 9/13/22 16:40, Andrey Lepikhov wrote:
On 5/9/2022 12:22, Richard Guo wrote:
On Fri, Sep 2, 2022 at 7:09 PM Andrey Lepikhov <a.lepik...@postgrespro.ru <mailto:a.lepik...@postgrespro.ru>> wrote:
To resolve both issues, lower outer join passes through pull_sublinks_* into flattening routine (see attachment).
I've added these cases into subselect.sql
In attachment - new version of the patch, rebased onto current master.

--
Regards
Andrey Lepikhov
Postgres Professional
From 6462578f8789cb831f45ebfad65308d6afabb833 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepik...@postgrespro.ru>
Date: Wed, 28 Sep 2022 13:42:12 +0300
Subject: [PATCH] Transform correlated subquery of type N-J [1] into ordinary
 join query. With many restrictions, check each subquery and pull up
 expressions, references upper query block. Works for operators '=' and 'IN'.
 Machinery of converting ANY subquery to JOIN stays the same with minor
 changes in walker function.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pass a lower outer join through the pullup_sublink recursive procedure to find
out some situations when qual references outer side of an outer join.

[1] Kim, Won. “On optimizing an SQL-like nested query.” ACM Trans. Database Syst. 7 (1982): 443-469.
---
 src/backend/optimizer/plan/subselect.c    | 320 +++++++++++++++-
 src/backend/optimizer/prep/prepjointree.c |  71 ++--
 src/backend/optimizer/util/tlist.c        |   2 +-
 src/backend/optimizer/util/var.c          |   8 +
 src/backend/utils/misc/guc_tables.c       |  10 +
 src/include/optimizer/optimizer.h         |   1 +
 src/include/optimizer/subselect.h         |   3 +-
 src/include/optimizer/tlist.h             |   1 +
 src/test/regress/expected/prepare.out     |  18 +
 src/test/regress/expected/subselect.out   | 438 ++++++++++++++++++++++
 src/test/regress/sql/prepare.sql          |   7 +
 src/test/regress/sql/subselect.sql        | 162 ++++++++
 12 files changed, 1004 insertions(+), 37 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 92e3338584..29da43bb4d 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -32,6 +32,7 @@
 #include "optimizer/planner.h"
 #include "optimizer/prep.h"
 #include "optimizer/subselect.h"
+#include "optimizer/tlist.h"
 #include "parser/parse_relation.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
@@ -65,6 +66,8 @@ typedef struct inline_cte_walker_context
 } inline_cte_walker_context;
 
 
+bool optimize_correlated_subqueries = true;
+
 static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
 						   List *plan_params,
 						   SubLinkType subLinkType, int subLinkId,
@@ -1229,6 +1232,288 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context)
 	return expression_tree_walker(node, inline_cte_walker, context);
 }
 
+static bool
+contain_placeholders(Node *node, inline_cte_walker_context *context)
+{
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Query))
+		return query_tree_walker((Query *) node, contain_placeholders, context, 0);
+	else if (IsA(node, PlaceHolderVar))
+	{
+		return true;
+	}
+
+	return expression_tree_walker(node, contain_placeholders, context);
+}
+
+/*
+ * To be pullable all clauses of flattening correlated subquery should be
+ * anded and mergejoinable (XXX: really necessary?)
+ */
+static bool
+quals_is_pullable(Node *quals)
+{
+	if (!contain_vars_of_level(quals, 1))
+		return true;
+
+	if (quals && IsA(quals, OpExpr))
+	{
+		OpExpr *expr = (OpExpr *) quals;
+		Node   *leftarg;
+
+		/* Contains only one expression */
+		leftarg = linitial(expr->args);
+		if (!op_mergejoinable(expr->opno, exprType(leftarg))) /* Is it really necessary ? */
+			return false;
+
+		if (contain_placeholders(quals, NULL))
+			return false;
+
+		return true;
+	}
+	else if (is_andclause(quals))
+	{
+		ListCell   *l;
+
+		foreach(l, ((BoolExpr *) quals)->args)
+		{
+			Node *andarg = (Node *) lfirst(l);
+
+			if (!IsA(andarg, OpExpr))
+				return false;
+			if (!quals_is_pullable(andarg))
+				return false;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+typedef struct
+{
+	Query  *subquery;
+	int		newvarno;
+	List   *pulling_quals;
+	bool	varlevel_up;
+} correlated_t;
+
+static Node *
+pull_subquery_clauses_mutator(Node *node, correlated_t *ctx)
+{
+	if (node == NULL)
+		return NULL;
+
+	if (IsA(node, OpExpr) && !ctx->varlevel_up)
+	{
+		if (!contain_vars_of_level(node, 1))
+			return node;
+
+		/*
+		 * The expression contains links to upper relation. It will be pulled to
+		 * uplevel. All links into vars of upper levels must be changed.
+		 */
+
+		ctx->varlevel_up = true;
+		ctx->pulling_quals =
+			lappend(ctx->pulling_quals,
+					expression_tree_mutator(node,
+											pull_subquery_clauses_mutator,
+											(void *) ctx));
+		ctx->varlevel_up = false;
+
+		/* Replace position of pulled expression by the 'true' value */
+		return makeBoolConst(true, false);
+	}
+	if (IsA(node, PlaceHolderVar))
+	{
+		PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+		if (ctx->varlevel_up && phv->phlevelsup > 0)
+			phv->phlevelsup--;
+		/* fall through to recurse into argument */
+	}
+	else if (IsA(node, RangeTblEntry))
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) node;
+
+		if (rte->rtekind == RTE_CTE)
+		{
+			if (rte->ctelevelsup > 0 && ctx->varlevel_up)
+				rte->ctelevelsup--;
+		}
+		return node;
+	}
+	else if (IsA(node, Aggref))
+	{
+		if (((Aggref *) node)->agglevelsup > 0 && ctx->varlevel_up)
+			((Aggref *) node)->agglevelsup--;
+		return node;
+	}
+	else if (IsA(node, GroupingFunc))
+	{
+		if (((GroupingFunc *) node)->agglevelsup > 0 && ctx->varlevel_up)
+			((GroupingFunc *) node)->agglevelsup--;
+		return node;
+	}
+	else if (IsA(node, Var))
+	{
+		Var *var = (Var *) node;
+
+		Assert(ctx->varlevel_up);
+
+		/* An upper relation variable */
+		if (var->varlevelsup > 0)
+		{
+			/*
+			 * Isn't needed to copy node or change varno because it correctly
+			 * refers to Table Entry of a parent and already removed from
+			 * the subquery clauses list.
+			 */
+			var->varlevelsup--;
+
+			return (Node *) var;
+		}
+		else
+		{
+			Var			  *newvar;
+			TargetEntry   *tle;
+
+			/*
+			 * The var refers to subquery table entry. Include a copy the var
+			 * into the target list, if necessary. Arrange varattno of the
+			 * new var of upper relation with a link to this entry.
+			 */
+
+			/* Create a var for usage in upper relation */
+			newvar = (Var *) copyObject(node);
+
+			/* Does the var already exists in the target list? */
+			tle = tlist_member_match_var(var, ctx->subquery->targetList);
+
+			if (tle == NULL)
+			{
+				int resno = list_length(ctx->subquery->targetList) + 1;
+
+				/*
+				 * Target list of the subquery doesn't contain this var. Add it
+				 * into the end of the target list and correct the link
+				 * XXX: Maybe choose real colname here?
+				 */
+				tle = makeTargetEntry((Expr *) var, resno, "rescol", false);
+				ctx->subquery->targetList = lappend(ctx->subquery->targetList,
+													tle);
+			}
+			else
+			{
+				if (tle->resjunk)
+				{
+					/*
+					 * Target entry exists but used as an utility entry
+					 * (for grouping, as an example). So, revert its status to
+					 * a full valued entry.
+					 */
+					tle->resjunk = false;
+					tle->resname = pstrdup("resjunkcol");
+				}
+			}
+
+			/*
+			 * Set the new var to refer newly created RangeTblEntry in the upper
+			 * query and varattno to refer at specific position in the target
+			 * list.
+			 */
+			newvar->varno = ctx->newvarno;
+			newvar->varattno = tle->resno;
+
+			return (Node *) newvar;
+		}
+	}
+	if (IsA(node, Query))
+		return (Node *) query_tree_mutator((Query *) node,
+										   pull_subquery_clauses_mutator,
+										   (void *) ctx, 0);
+
+	return expression_tree_mutator(node, pull_subquery_clauses_mutator,
+								   (void *) ctx);
+}
+
+static List *
+pull_correlated_clauses(PlannerInfo *root, SubLink *sublink,
+						JoinExpr *lowest_outer_join)
+{
+	Query		   *parse = root->parse;
+	Query		   *subselect = (Query *) sublink->subselect;
+	FromExpr	   *f;
+	correlated_t	ctx = {.subquery = subselect,
+						   .newvarno = list_length(parse->rtable) + 1, /* Looks like a hack */
+						   .pulling_quals = NIL,
+						   .varlevel_up = false};
+	Relids		safe_upper_varnos = NULL;
+
+	Assert(IsA(subselect, Query));
+
+	/* Use only for correlated candidates, just for optimal usage */
+	Assert(contain_vars_of_level((Node *) subselect, 1));
+
+	if (!optimize_correlated_subqueries ||
+		subselect->hasAggs ||
+		subselect->hasWindowFuncs ||
+		subselect->hasForUpdate || /* Pulling of clauses can change a number of tuples which subselect returns. */
+		subselect->hasRowSecurity /* Just because of paranoid safety */
+		)
+		/* The feature is switched off. */
+		return NULL;
+
+	/*
+	 * We pull up quals and arrange variable levels for expressions in WHERE
+	 * section only. So, cut the optimization off if an upper relation links
+	 * from another parts of the subquery are detected.
+	 */
+	if (contain_vars_of_level((Node *) subselect->cteList, 1) ||
+		/* see comments in subselect.sql */
+		contain_vars_of_level((Node *) subselect->rtable, 1) ||
+		contain_vars_of_level((Node *) subselect->targetList, 1) ||
+		contain_vars_of_level((Node *) subselect->returningList, 1) ||
+		contain_vars_of_level((Node *) subselect->groupingSets, 1) ||
+		contain_vars_of_level((Node *) subselect->distinctClause, 1) ||
+		contain_vars_of_level((Node *) subselect->sortClause, 1) ||
+		contain_vars_of_level((Node *) subselect->limitOffset, 1) ||
+		contain_vars_of_level((Node *) subselect->limitCount, 1) ||
+		contain_vars_of_level((Node *) subselect->rowMarks, 1) ||
+		contain_vars_of_level((Node *) subselect->havingQual, 1) ||
+		contain_vars_of_level((Node *) subselect->groupClause, 1))
+		return NULL;
+
+	f = subselect->jointree;
+
+	if (!f || !f->quals || !quals_is_pullable(f->quals))
+		return NULL;
+
+	if (lowest_outer_join)
+		safe_upper_varnos = get_relids_in_jointree(
+				(lowest_outer_join->jointype == JOIN_RIGHT) ?
+					lowest_outer_join->larg : lowest_outer_join->rarg, true);
+
+	if (safe_upper_varnos &&
+		!bms_is_subset(pull_varnos_of_level(root, f->quals, 1),
+					   safe_upper_varnos))
+		return NULL;
+
+	/*
+	 * Now, is proved that it is possible to pull up expressions with variables
+	 * from the upper query.
+	 * Pull up quals, containing correlated expressions. Replace its
+	 * positions with a true boolean expression.
+	 * It would be removed on a next planning stage.
+	 */
+	f->quals = pull_subquery_clauses_mutator(f->quals, (void *) &ctx);
+
+	return ctx.pulling_quals;
+}
 
 /*
  * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
@@ -1266,7 +1551,7 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context)
  */
 JoinExpr *
 convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
-							Relids available_rels)
+							Relids available_rels, JoinExpr *lowest_outer_join)
 {
 	JoinExpr   *result;
 	Query	   *parse = root->parse;
@@ -1279,16 +1564,10 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	List	   *subquery_vars;
 	Node	   *quals;
 	ParseState *pstate;
+	List	   *pclauses = NIL;
 
 	Assert(sublink->subLinkType == ANY_SUBLINK);
 
-	/*
-	 * The sub-select must not refer to any Vars of the parent query. (Vars of
-	 * higher levels should be okay, though.)
-	 */
-	if (contain_vars_of_level((Node *) subselect, 1))
-		return NULL;
-
 	/*
 	 * The test expression must contain some Vars of the parent query, else
 	 * it's not gonna be a join.  (Note that it won't have Vars referring to
@@ -1310,6 +1589,17 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (contain_volatile_functions(sublink->testexpr))
 		return NULL;
 
+	/*
+	 * The sub-select must not refer to any Vars of the parent query. (Vars of
+	 * higher levels should be okay, though.)
+	 * In the case of correlated subquery, jointree quals structure will be
+	 * modified: expressions with variables from upper query moves to the
+	 * pulled_clauses list, their places in the quals replaces by "true" value.
+	 */
+	if (contain_vars_of_level((Node *) subselect, 1) &&
+		(pclauses = pull_correlated_clauses(root, sublink, lowest_outer_join)) == NIL)
+		return NULL;
+
 	/* Create a dummy ParseState for addRangeTableEntryForSubquery */
 	pstate = make_parsestate(NULL);
 
@@ -1348,6 +1638,20 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 */
 	quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
 
+	/* Nested subquery with references to upper level relation. */
+	if (pclauses != NIL)
+	{
+		/* Add clauses, pulled from subquery into WHERE section of the parent. */
+		if (IsA(quals, BoolExpr))
+		{
+			BoolExpr *b = (BoolExpr *) quals;
+			b->args = list_concat(b->args, pclauses);
+		}
+		else
+			quals = (Node *) make_andclause(
+									list_concat(list_make1(quals), pclauses));
+	}
+
 	/*
 	 * And finally, build the JoinExpr node.
 	 */
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 41c7066d90..f72b8b1320 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -62,10 +62,12 @@ typedef struct reduce_outer_joins_state
 } reduce_outer_joins_state;
 
 static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
-											   Relids *relids);
+											   Relids *relids,
+											   JoinExpr *lowest_outer_join);
 static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 										   Node **jtlink1, Relids available_rels1,
-										   Node **jtlink2, Relids available_rels2);
+										   Node **jtlink2, Relids available_rels2,
+										   JoinExpr *lowest_outer_join);
 static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
 										JoinExpr *lowest_outer_join,
 										JoinExpr *lowest_nulling_outer_join,
@@ -293,7 +295,7 @@ pull_up_sublinks(PlannerInfo *root)
 	/* Begin recursion through the jointree */
 	jtnode = pull_up_sublinks_jointree_recurse(root,
 											   (Node *) root->parse->jointree,
-											   &relids);
+											   &relids, NULL);
 
 	/*
 	 * root->parse->jointree must always be a FromExpr, so insert a dummy one
@@ -313,7 +315,7 @@ pull_up_sublinks(PlannerInfo *root)
  */
 static Node *
 pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
-								  Relids *relids)
+								  Relids *relids, JoinExpr *lowest_outer_join)
 {
 	if (jtnode == NULL)
 	{
@@ -343,7 +345,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 
 			newchild = pull_up_sublinks_jointree_recurse(root,
 														 lfirst(l),
-														 &childrelids);
+														 &childrelids,
+														 lowest_outer_join);
 			newfromlist = lappend(newfromlist, newchild);
 			frelids = bms_join(frelids, childrelids);
 		}
@@ -354,7 +357,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 		/* Now process qual --- all children are available for use */
 		newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
 													&jtlink, frelids,
-													NULL, NULL);
+													NULL, NULL,
+													lowest_outer_join);
 
 		/*
 		 * Note that the result will be either newf, or a stack of JoinExprs
@@ -385,9 +389,11 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 
 		/* Recurse to process children and collect their relids */
 		j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
-													&leftrelids);
+													&leftrelids,
+													lowest_outer_join);
 		j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
-													&rightrelids);
+													&rightrelids,
+													lowest_outer_join);
 
 		/*
 		 * Now process qual, showing appropriate child relids as available,
@@ -408,13 +414,14 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 														 &jtlink,
 														 bms_union(leftrelids,
 																   rightrelids),
-														 NULL, NULL);
+														 NULL, NULL,
+														 lowest_outer_join);
 				break;
 			case JOIN_LEFT:
 				j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
 														 &j->rarg,
 														 rightrelids,
-														 NULL, NULL);
+														 NULL, NULL, j);
 				break;
 			case JOIN_FULL:
 				/* can't do anything with full-join quals */
@@ -423,7 +430,7 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 				j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
 														 &j->larg,
 														 leftrelids,
-														 NULL, NULL);
+														 NULL, NULL, j);
 				break;
 			default:
 				elog(ERROR, "unrecognized join type: %d",
@@ -468,7 +475,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 static Node *
 pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 							  Node **jtlink1, Relids available_rels1,
-							  Node **jtlink2, Relids available_rels2)
+							  Node **jtlink2, Relids available_rels2,
+							  JoinExpr *lowest_outer_join)
 {
 	if (node == NULL)
 		return NULL;
@@ -482,7 +490,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 		if (sublink->subLinkType == ANY_SUBLINK)
 		{
 			if ((j = convert_ANY_sublink_to_join(root, sublink,
-												 available_rels1)) != NULL)
+												 available_rels1,
+												 lowest_outer_join)) != NULL)
 			{
 				/* Yes; insert the new join node into the join tree */
 				j->larg = *jtlink1;
@@ -490,7 +499,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 				/* Recursively process pulled-up jointree nodes */
 				j->rarg = pull_up_sublinks_jointree_recurse(root,
 															j->rarg,
-															&child_rels);
+															&child_rels, j);
 
 				/*
 				 * Now recursively process the pulled-up quals.  Any inserted
@@ -502,13 +511,15 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 														 &j->larg,
 														 available_rels1,
 														 &j->rarg,
-														 child_rels);
+														 child_rels,
+														 j);
 				/* Return NULL representing constant TRUE */
 				return NULL;
 			}
 			if (available_rels2 != NULL &&
 				(j = convert_ANY_sublink_to_join(root, sublink,
-												 available_rels2)) != NULL)
+												 available_rels2,
+												 lowest_outer_join)) != NULL)
 			{
 				/* Yes; insert the new join node into the join tree */
 				j->larg = *jtlink2;
@@ -516,7 +527,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 				/* Recursively process pulled-up jointree nodes */
 				j->rarg = pull_up_sublinks_jointree_recurse(root,
 															j->rarg,
-															&child_rels);
+															&child_rels, j);
 
 				/*
 				 * Now recursively process the pulled-up quals.  Any inserted
@@ -528,7 +539,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 														 &j->larg,
 														 available_rels2,
 														 &j->rarg,
-														 child_rels);
+														 child_rels,
+														 j);
 				/* Return NULL representing constant TRUE */
 				return NULL;
 			}
@@ -544,7 +556,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 				/* Recursively process pulled-up jointree nodes */
 				j->rarg = pull_up_sublinks_jointree_recurse(root,
 															j->rarg,
-															&child_rels);
+															&child_rels, j);
 
 				/*
 				 * Now recursively process the pulled-up quals.  Any inserted
@@ -556,7 +568,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 														 &j->larg,
 														 available_rels1,
 														 &j->rarg,
-														 child_rels);
+														 child_rels,
+														 j);
 				/* Return NULL representing constant TRUE */
 				return NULL;
 			}
@@ -570,7 +583,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 				/* Recursively process pulled-up jointree nodes */
 				j->rarg = pull_up_sublinks_jointree_recurse(root,
 															j->rarg,
-															&child_rels);
+															&child_rels, j);
 
 				/*
 				 * Now recursively process the pulled-up quals.  Any inserted
@@ -582,7 +595,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 														 &j->larg,
 														 available_rels2,
 														 &j->rarg,
-														 child_rels);
+														 child_rels,
+														 j);
 				/* Return NULL representing constant TRUE */
 				return NULL;
 			}
@@ -610,7 +624,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 					/* Recursively process pulled-up jointree nodes */
 					j->rarg = pull_up_sublinks_jointree_recurse(root,
 																j->rarg,
-																&child_rels);
+																&child_rels, j);
 
 					/*
 					 * Now recursively process the pulled-up quals.  Because
@@ -622,7 +636,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 															 j->quals,
 															 &j->rarg,
 															 child_rels,
-															 NULL, NULL);
+															 NULL, NULL,
+															 j);
 					/* Return NULL representing constant TRUE */
 					return NULL;
 				}
@@ -636,7 +651,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 					/* Recursively process pulled-up jointree nodes */
 					j->rarg = pull_up_sublinks_jointree_recurse(root,
 																j->rarg,
-																&child_rels);
+																&child_rels, j);
 
 					/*
 					 * Now recursively process the pulled-up quals.  Because
@@ -648,7 +663,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 															 j->quals,
 															 &j->rarg,
 															 child_rels,
-															 NULL, NULL);
+															 NULL, NULL,
+															 j);
 					/* Return NULL representing constant TRUE */
 					return NULL;
 				}
@@ -673,7 +689,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 													  jtlink1,
 													  available_rels1,
 													  jtlink2,
-													  available_rels2);
+													  available_rels2,
+													  lowest_outer_join);
 			if (newclause)
 				newclauses = lappend(newclauses, newclause);
 		}
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 784a1af82d..5b7aee121f 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -98,7 +98,7 @@ tlist_member(Expr *node, List *targetlist)
  * This is needed in some cases where we can't be sure of an exact typmod
  * match.  For safety, though, we insist on vartype match.
  */
-static TargetEntry *
+TargetEntry *
 tlist_member_match_var(Var *var, List *targetlist)
 {
 	ListCell   *temp;
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index 7db86c39ef..54441e692b 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -461,6 +461,14 @@ contain_vars_of_level_walker(Node *node, int *sublevels_up)
 			return true;		/* abort the tree traversal and return true */
 		/* else fall through to check the contained expr */
 	}
+	if (IsA(node, RangeTblEntry))
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) node;
+
+		/* Someone can call the routine on a field of Query struct */
+		return range_table_entry_walker(rte, contain_vars_of_level_walker,
+										(void *) sublevels_up, 0);
+	}
 	if (IsA(node, Query))
 	{
 		/* Recurse into subselects */
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 7ff653b517..762e504ec3 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -765,6 +765,16 @@ StaticAssertDecl(lengthof(config_type_names) == (PGC_ENUM + 1),
 
 struct config_bool ConfigureNamesBool[] =
 {
+	{
+		{"optimize_correlated_subqueries", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("optimize_correlated_subqueries."),
+			NULL,
+			GUC_EXPLAIN | GUC_NOT_IN_SAMPLE
+		},
+		&optimize_correlated_subqueries,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Enables the planner's use of sequential-scan plans."),
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 409005bae9..cdf3fdce1a 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -88,6 +88,7 @@ extern PGDLLIMPORT double parallel_tuple_cost;
 extern PGDLLIMPORT double parallel_setup_cost;
 extern PGDLLIMPORT double recursive_worktable_factor;
 extern PGDLLIMPORT int effective_cache_size;
+extern PGDLLIMPORT bool optimize_correlated_subqueries;
 
 extern double clamp_row_est(double nrows);
 extern long clamp_cardinality_to_long(Cardinality x);
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 456d3076e0..4e126e45ee 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -19,7 +19,8 @@
 extern void SS_process_ctes(PlannerInfo *root);
 extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root,
 											 SubLink *sublink,
-											 Relids available_rels);
+											 Relids available_rels,
+											 JoinExpr *lowest_outer_join);
 extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root,
 												SubLink *sublink,
 												bool under_not,
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 04668ba1c0..7627e7f679 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -18,6 +18,7 @@
 
 
 extern TargetEntry *tlist_member(Expr *node, List *targetlist);
+extern TargetEntry *tlist_member_match_var(Var *var, List *targetlist);
 
 extern List *add_to_flat_tlist(List *tlist, List *exprs);
 
diff --git a/src/test/regress/expected/prepare.out b/src/test/regress/expected/prepare.out
index 5815e17b39..749b3faf64 100644
--- a/src/test/regress/expected/prepare.out
+++ b/src/test/regress/expected/prepare.out
@@ -184,6 +184,24 @@ SELECT name, statement, parameter_types, result_types FROM pg_prepared_statement
       |     UPDATE tenk1 SET stringu1 = $2 WHERE unique1 = $1;           |                                                    | 
 (6 rows)
 
+-- The optimization on unnesting of correlated subqueries should work
+PREPARE q9(name,int) AS
+    SELECT * FROM tenk1 upper WHERE unique1 IN (
+        SELECT sub.unique2 FROM tenk1 sub
+        WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2);
+EXPLAIN (COSTS OFF) EXECUTE q9('abc',2);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Nested Loop
+   ->  HashAggregate
+         Group Key: sub.unique2, sub.unique1
+         ->  Seq Scan on tenk1 sub
+               Filter: (stringu1 = 'abc'::name)
+   ->  Index Scan using tenk1_unique1 on tenk1 upper
+         Index Cond: (unique1 = sub.unique2)
+         Filter: (sub.unique1 = (unique2 + 2))
+(8 rows)
+
 -- test DEALLOCATE ALL;
 DEALLOCATE ALL;
 SELECT name, statement, parameter_types FROM pg_prepared_statements
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 63d26d44fc..1523770984 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -164,6 +164,35 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field"
                 3 |            3
 (6 rows)
 
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+                                             QUERY PLAN                                             
+----------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision = subselect_tbl.f3))
+   ->  Seq Scan on subselect_tbl upper
+   ->  Hash
+         ->  HashAggregate
+               Group Key: subselect_tbl.f2, subselect_tbl.f3
+               ->  Seq Scan on subselect_tbl
+(7 rows)
+
+-- Still doesn't work for NOT IN
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 NOT IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Seq Scan on subselect_tbl upper
+   Filter: (NOT (SubPlan 1))
+   SubPlan 1
+     ->  Seq Scan on subselect_tbl
+           Filter: ((upper.f2)::double precision = f3)
+(5 rows)
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f1 IN
@@ -177,6 +206,415 @@ SELECT f1 AS "Correlated Field", f3 AS "Second Field"
                 3 |            3
 (5 rows)
 
+-- Constraints, imposed by LATERAL references, prohibit flattening of underlying
+-- Sublink.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   ->  Nested Loop Semi Join
+         Join Filter: (SubPlan 1)
+         ->  Seq Scan on subselect_tbl a
+         ->  Materialize
+               ->  Seq Scan on subselect_tbl b
+         SubPlan 1
+           ->  Seq Scan on subselect_tbl c
+                 Filter: (f1 = a.f2)
+(9 rows)
+
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+ count 
+-------
+     5
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+                  QUERY PLAN                   
+-----------------------------------------------
+ Aggregate
+   ->  Nested Loop Anti Join
+         Join Filter: (SubPlan 1)
+         ->  Seq Scan on subselect_tbl a
+         ->  Materialize
+               ->  Seq Scan on subselect_tbl b
+         SubPlan 1
+           ->  Seq Scan on subselect_tbl c
+                 Filter: (f1 = a.f2)
+(9 rows)
+
+-- Prohibit to unnest subquery - quals contain lateral references to rels
+-- outside a higher outer join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Nested Loop Left Join
+         Join Filter: (SubPlan 1)
+         ->  Seq Scan on subselect_tbl a
+         ->  Materialize
+               ->  Seq Scan on subselect_tbl b
+         SubPlan 1
+           ->  Seq Scan on subselect_tbl c
+                 Filter: (f3 = (a.f2)::double precision)
+(9 rows)
+
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+ count 
+-------
+    18
+(1 row)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN
+    (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1)
+; -- Optimizer removes excess clause
+                  QUERY PLAN                   
+-----------------------------------------------
+ Hash Join
+   Hash Cond: (a.f1 = b.f2)
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, b.f2
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- a bit more complex targetlist expression shouldn't cut off the optimization
+                                                                       QUERY PLAN                                                                       
+--------------------------------------------------------------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (((a.f1)::double precision = ((((b.f2 * b.f1))::double precision / b.f3) + '2'::double precision)) AND ((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: ((((b.f2 * b.f1))::double precision / b.f3) + '2'::double precision), b.f3
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Two variables in a target list
+                                                QUERY PLAN                                                
+----------------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.f1 = b.f2) AND (a.f3 = (b.f1)::double precision) AND ((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, (b.f1)::double precision, b.f3
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Expression as an element of composite type shouldn't cut off the optimization
+                                                   QUERY PLAN                                                   
+----------------------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.f1 = b.f2) AND (a.f3 = ((b.f1 * 2))::double precision) AND ((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, ((b.f1 * 2))::double precision, b.f3
+               ->  Seq Scan on subselect_tbl b
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 = b.f3 AND b.f3 <> 12)
+; -- Two expressions with correlated variables
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((b.f2 = a.f1) AND (b.f3 = (a.f2)::double precision))
+   ->  HashAggregate
+         Group Key: b.f2, b.f3, b.f3
+         ->  Seq Scan on subselect_tbl b
+               Filter: (f3 <> '12'::double precision)
+   ->  Hash
+         ->  Seq Scan on subselect_tbl a
+               Filter: ((f2)::double precision = (f1)::double precision)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 = b.f3 AND b.f1 < 12)
+; -- Two expressions with correlated variables relates on one upper variable.
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.f1 = b.f2) AND ((a.f2)::double precision = b.f3))
+   ->  Seq Scan on subselect_tbl a
+   ->  Hash
+         ->  HashAggregate
+               Group Key: b.f2, b.f3, b.f3
+               ->  Seq Scan on subselect_tbl b
+                     Filter: (f1 < 12)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Pull clauses without unnesting the query. XXX: It reduces performance in most use cases, doesn't it?
+               QUERY PLAN                
+-----------------------------------------
+ Hash Join
+   Hash Cond: (b.f2 = a.f1)
+   ->  HashAggregate
+         Group Key: b.f2
+         ->  Seq Scan on subselect_tbl b
+               Filter: (f2 < 12)
+   ->  Hash
+         ->  Seq Scan on subselect_tbl a
+               Filter: (f1 = f2)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14
+      UNION ALL
+    SELECT c.f1 FROM subselect_tbl c
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- Disallow flattening of union all
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Append
+           ->  Function Scan on generate_series x
+                 Filter: ((x >= 12) AND (x <= 14) AND ((x + 1) = a.f2))
+           ->  Seq Scan on subselect_tbl c
+                 Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2))
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2)
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- XXX: Could we flatten such subquery?
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop
+           ->  Seq Scan on subselect_tbl c
+                 Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2))
+           ->  Seq Scan on subselect_tbl b
+                 Filter: (f1 = a.f2)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b, subselect_tbl c
+    WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL
+  )
+; -- TODO: Could we flatten such subquery?
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Nested Loop
+           ->  Seq Scan on subselect_tbl b
+                 Filter: (f1 = a.f2)
+           ->  Materialize
+                 ->  Seq Scan on subselect_tbl c
+                       Filter: ((f1 IS NOT NULL) AND (f2 = a.f2))
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,f2) IN (
+    SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Doesn't support unnesting with aggregate functions
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  GroupAggregate
+           Group Key: b.f2
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Give up optimization if CTE in subquery contains links to upper relation.
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (a.f1 = a.f2)
+           ->  Seq Scan on subselect_tbl c
+                 Filter: ((f1 < 42) AND (f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Correlated subquery with trivial CTE can be pulled up
+                   QUERY PLAN                    
+-------------------------------------------------
+ Hash Semi Join
+   Hash Cond: (a.f1 = c.f2)
+   ->  Seq Scan on subselect_tbl a
+         Filter: (f1 = f2)
+   ->  Hash
+         ->  Seq Scan on subselect_tbl c
+               Filter: ((f1 < 42) AND (f2 < 12))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,a.f3) IN (
+    SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2)
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Doesn't support unnesting with window functions in target list
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  WindowAgg
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2))
+(6 rows)
+
+-- A having qual, group clause and so on, with links to upper relation variable
+-- cut off the optimization because another case we must rewrite the subquery
+-- as a lateral TargetEntry and arrange these links.
+-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such
+-- complex work and it would induce additional complex code.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2) HAVING b.f2 > a.f3
+  )
+;
+                                       QUERY PLAN                                        
+-----------------------------------------------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Group
+           Group Key: b.f2
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2) AND ((f2)::double precision > a.f3))
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 < 12
+  )
+; -- Don't allow links to upper query in FROM section of subquery
+                    QUERY PLAN                     
+---------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Function Scan on generate_series x
+           Filter: ((x < 12) AND ((x + 1) = a.f2))
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (a.f1)
+  )
+; -- GROUP BY contains link to upper relation
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Seq Scan on subselect_tbl a
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  Group
+           Group Key: a.f1
+           ->  Seq Scan on subselect_tbl b
+                 Filter: ((f2 < 12) AND (f2 = a.f2))
+(7 rows)
+
+-- Flatten subquery with not-correlated clauses. The same result set returned
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42);
+                                             QUERY PLAN                                             
+----------------------------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision = subselect_tbl.f3))
+   ->  Seq Scan on subselect_tbl upper
+   ->  Hash
+         ->  HashAggregate
+               Group Key: subselect_tbl.f2, subselect_tbl.f3
+               ->  Seq Scan on subselect_tbl
+                     Filter: (f2 <> 42)
+(8 rows)
+
+SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42);
+ Correlated Field | Second Field 
+------------------+--------------
+                2 |            4
+                3 |            5
+                1 |            1
+                2 |            2
+                3 |            3
+(5 rows)
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
diff --git a/src/test/regress/sql/prepare.sql b/src/test/regress/sql/prepare.sql
index c6098dc95c..8a8164ee99 100644
--- a/src/test/regress/sql/prepare.sql
+++ b/src/test/regress/sql/prepare.sql
@@ -78,6 +78,13 @@ PREPARE q8 AS
 SELECT name, statement, parameter_types, result_types FROM pg_prepared_statements
     ORDER BY name;
 
+-- The optimization on unnesting of correlated subqueries should work
+PREPARE q9(name,int) AS
+    SELECT * FROM tenk1 upper WHERE unique1 IN (
+        SELECT sub.unique2 FROM tenk1 sub
+        WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2);
+EXPLAIN (COSTS OFF) EXECUTE q9('abc',2);
+
 -- test DEALLOCATE ALL;
 DEALLOCATE ALL;
 SELECT name, statement, parameter_types FROM pg_prepared_statements
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 40276708c9..40ed61d508 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -67,11 +67,173 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1);
 
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+-- Still doesn't work for NOT IN
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 NOT IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f1 IN
     (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3);
 
+-- Constraints, imposed by LATERAL references, prohibit flattening of underlying
+-- Sublink.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a
+  WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN (
+      SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2));
+
+-- Prohibit to unnest subquery - quals contain lateral references to rels
+-- outside a higher outer join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN (
+  SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2);
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN
+    (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1)
+; -- Optimizer removes excess clause
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- a bit more complex targetlist expression shouldn't cut off the optimization
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Two variables in a target list
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 = a.f2)
+; -- Expression as an element of composite type shouldn't cut off the optimization
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 = b.f3 AND b.f3 <> 12)
+; -- Two expressions with correlated variables
+EXPLAIN (COSTS OFF)
+SELECT * FROM subselect_tbl a
+WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 = b.f3 AND b.f1 < 12)
+; -- Two expressions with correlated variables relates on one upper variable.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Pull clauses without unnesting the query. XXX: It reduces performance in most use cases, doesn't it?
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14
+      UNION ALL
+    SELECT c.f1 FROM subselect_tbl c
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- Disallow flattening of union all
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2)
+    WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14
+  )
+; -- XXX: Could we flatten such subquery?
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f1 FROM subselect_tbl b, subselect_tbl c
+    WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL
+  )
+; -- TODO: Could we flatten such subquery?
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,f2) IN (
+    SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2)
+  )
+; -- Doesn't support unnesting with aggregate functions
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Give up optimization if CTE in subquery contains links to upper relation.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    WITH cte AS (
+      SELECT * FROM subselect_tbl c WHERE f1 < 42
+    )
+    SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Correlated subquery with trivial CTE can be pulled up
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE (a.f1,a.f3) IN (
+    SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2)
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+  )
+; -- Doesn't support unnesting with window functions in target list
+
+-- A having qual, group clause and so on, with links to upper relation variable
+-- cut off the optimization because another case we must rewrite the subquery
+-- as a lateral TargetEntry and arrange these links.
+-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such
+-- complex work and it would induce additional complex code.
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2
+    FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (b.f2) HAVING b.f2 > a.f3
+  )
+;
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT b.f2 FROM (
+      SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x
+    ) b WHERE b.f2 = a.f2 AND b.f1 < 12
+  )
+; -- Don't allow links to upper query in FROM section of subquery
+EXPLAIN (COSTS OFF)
+  SELECT * FROM subselect_tbl a
+  WHERE a.f1 IN (
+    SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12
+    GROUP BY (a.f1)
+  )
+; -- GROUP BY contains link to upper relation
+
+-- Flatten subquery with not-correlated clauses. The same result set returned
+EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42);
+SELECT f1 AS "Correlated Field", f3 AS "Second Field"
+  FROM SUBSELECT_TBL upper
+  WHERE f1 IN
+    (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42);
+
 SELECT f1 AS "Correlated Field", f3 AS "Second Field"
   FROM SUBSELECT_TBL upper
   WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
-- 
2.34.1

Reply via email to