I wrote:
> The ideas I'd been toying with last night involved a pre-scan over
> the join tree to calculate the potential nullingrels of each leaf RTE
> (same idea as RelOptInfo.nulling_relids, but of course we don't have
> any RelOptInfos yet).  That seems painful though because we'd have to
> update the data structure somehow after each subquery pullup.

I realized that we can make that work by doing the pre-calculation
at the start of each pull_up_simple_subquery.  We only have to do
it for subqueries marked LATERAL, so this doesn't seem too horribly
inefficient.  Draft patch attached.  I didn't spend effort on
devising test cases, but it works for your example.

                        regards, tom lane

diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 2e0d41a8d1..ff9c1cd24e 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -42,6 +42,17 @@
 #include "rewrite/rewriteManip.h"
 
 
+typedef struct nullingrel_info
+{
+	/*
+	 * For each leaf RTE, nullingrels[rti] is the set of relids of outer joins
+	 * that potentially null that RTE.
+	 */
+	Relids	   *nullingrels;
+	/* Length of range table (maximum index in nullingrels[]) */
+	int			rtlength;		/* used only for assertion checks */
+} nullingrel_info;
+
 typedef struct pullup_replace_vars_context
 {
 	PlannerInfo *root;
@@ -49,6 +60,8 @@ typedef struct pullup_replace_vars_context
 	RangeTblEntry *target_rte;	/* RTE of subquery */
 	Relids		relids;			/* relids within subquery, as numbered after
 								 * pullup (set only if target_rte->lateral) */
+	nullingrel_info *nullinfo;	/* per-RTE nullingrel info (set only if
+								 * target_rte->lateral) */
 	bool	   *outer_hasSubLinks;	/* -> outer query's hasSubLinks */
 	int			varno;			/* varno of subquery */
 	bool		wrap_non_vars;	/* do we need all non-Var outputs to be PHVs? */
@@ -142,6 +155,9 @@ static void substitute_phv_relids(Node *node,
 static void fix_append_rel_relids(PlannerInfo *root, int varno,
 								  Relids subrelids);
 static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
+static nullingrel_info *get_nullingrels(Query *parse);
+static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
+									nullingrel_info *info);
 
 
 /*
@@ -1259,10 +1275,16 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	rvcontext.targetlist = subquery->targetList;
 	rvcontext.target_rte = rte;
 	if (rte->lateral)
+	{
 		rvcontext.relids = get_relids_in_jointree((Node *) subquery->jointree,
 												  true, true);
-	else						/* won't need relids */
+		rvcontext.nullinfo = get_nullingrels(parse);
+	}
+	else						/* won't need these fields */
+	{
 		rvcontext.relids = NULL;
+		rvcontext.nullinfo = NULL;
+	}
 	rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
 	rvcontext.varno = varno;
 	/* this flag will be set below, if needed */
@@ -1809,7 +1831,8 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
 	rvcontext.root = root;
 	rvcontext.targetlist = tlist;
 	rvcontext.target_rte = rte;
-	rvcontext.relids = NULL;
+	rvcontext.relids = NULL;	/* can't be any lateral references here */
+	rvcontext.nullinfo = NULL;
 	rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
 	rvcontext.varno = varno;
 	rvcontext.wrap_non_vars = false;
@@ -1971,9 +1994,10 @@ pull_up_constant_function(PlannerInfo *root, Node *jtnode,
 	/*
 	 * Since this function was reduced to a Const, it doesn't contain any
 	 * lateral references, even if it's marked as LATERAL.  This means we
-	 * don't need to fill relids.
+	 * don't need to fill relids or nullinfo.
 	 */
 	rvcontext.relids = NULL;
+	rvcontext.nullinfo = NULL;
 
 	rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
 	rvcontext.varno = ((RangeTblRef *) jtnode)->rtindex;
@@ -2688,9 +2712,42 @@ pullup_replace_vars_callback(Var *var,
 		{
 			/*
 			 * There should be Vars/PHVs within the expression that we can
-			 * modify.  Per above discussion, modify only Vars/PHVs of the
-			 * subquery, not lateral references.
+			 * modify.  Vars/PHVs of the subquery should have the full
+			 * var->varnullingrels added to them, but if there are lateral
+			 * references within the expression, those must be marked with
+			 * only the nullingrels that potentially apply to them.  That
+			 * could be different for different lateral relations, so we have
+			 * to do this work for each one.
 			 */
+			if (rcon->target_rte->lateral)
+			{
+				nullingrel_info *nullinfo = rcon->nullinfo;
+				Relids		lvarnos;
+				int			lvarno;
+
+				/*
+				 * Identify lateral varnos used within newnode.  We must do
+				 * this before injecting var->varnullingrels into the tree.
+				 */
+				lvarnos = pull_varnos(rcon->root, newnode);
+				lvarnos = bms_del_members(lvarnos, rcon->relids);
+				/* For each one, add relevant nullingrels if any */
+				lvarno = -1;
+				while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0)
+				{
+					Relids		lnullingrels;
+
+					Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
+					lnullingrels = bms_intersect(var->varnullingrels,
+												 nullinfo->nullingrels[lvarno]);
+					if (!bms_is_empty(lnullingrels))
+						newnode = add_nulling_relids(newnode,
+													 bms_make_singleton(lvarno),
+													 lnullingrels);
+				}
+			}
+
+			/* Finally, deal with Vars/PHVs of the subquery itself */
 			newnode = add_nulling_relids(newnode,
 										 rcon->relids,
 										 var->varnullingrels);
@@ -4120,3 +4177,94 @@ find_jointree_node_for_rel(Node *jtnode, int relid)
 			 (int) nodeTag(jtnode));
 	return NULL;
 }
+
+/*
+ * get_nullingrels: collect info about which outer joins null which relations
+ *
+ * The result struct contains, for each leaf relation used in the query,
+ * the set of relids of outer joins that potentially null that rel.
+ */
+static nullingrel_info *
+get_nullingrels(Query *parse)
+{
+	nullingrel_info *result = palloc_object(nullingrel_info);
+
+	result->rtlength = list_length(parse->rtable);
+	result->nullingrels = palloc0_array(Relids, result->rtlength + 1);
+	get_nullingrels_recurse((Node *) parse->jointree, NULL, result);
+	return result;
+}
+
+/*
+ * Recursive guts of get_nullingrels().
+ *
+ * Note: at any recursion level, the passed-down upper_nullingrels must be
+ * treated as a constant, but it can be stored directly into *info
+ * if we're at leaf level.  Upper recursion levels do not free their mutated
+ * copies of the nullingrels, because those are probably referenced by
+ * at least one leaf rel.
+ */
+static void
+get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
+						nullingrel_info *info)
+{
+	if (jtnode == NULL)
+		return;
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		Assert(varno > 0 && varno <= info->rtlength);
+		info->nullingrels[varno] = upper_nullingrels;
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		ListCell   *l;
+
+		foreach(l, f->fromlist)
+		{
+			get_nullingrels_recurse(lfirst(l), upper_nullingrels, info);
+		}
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+		Relids		local_nullingrels;
+
+		switch (j->jointype)
+		{
+			case JOIN_INNER:
+				get_nullingrels_recurse(j->larg, upper_nullingrels, info);
+				get_nullingrels_recurse(j->rarg, upper_nullingrels, info);
+				break;
+			case JOIN_LEFT:
+			case JOIN_SEMI:
+			case JOIN_ANTI:
+				local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+												   j->rtindex);
+				get_nullingrels_recurse(j->larg, upper_nullingrels, info);
+				get_nullingrels_recurse(j->rarg, local_nullingrels, info);
+				break;
+			case JOIN_FULL:
+				local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+												   j->rtindex);
+				get_nullingrels_recurse(j->larg, local_nullingrels, info);
+				get_nullingrels_recurse(j->rarg, local_nullingrels, info);
+				break;
+			case JOIN_RIGHT:
+				local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+												   j->rtindex);
+				get_nullingrels_recurse(j->larg, local_nullingrels, info);
+				get_nullingrels_recurse(j->rarg, upper_nullingrels, info);
+				break;
+			default:
+				elog(ERROR, "unrecognized join type: %d",
+					 (int) j->jointype);
+				break;
+		}
+	}
+	else
+		elog(ERROR, "unrecognized node type: %d",
+			 (int) nodeTag(jtnode));
+}
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b54428b38c..2d4c870423 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3665,6 +3665,7 @@ nodeitem
 normal_rand_fctx
 nsphash_hash
 ntile_context
+nullingrel_info
 numeric
 object_access_hook_type
 object_access_hook_type_str

Reply via email to