I wrote:
> The ideas I'd been toying with last night involved a pre-scan over
> the join tree to calculate the potential nullingrels of each leaf RTE
> (same idea as RelOptInfo.nulling_relids, but of course we don't have
> any RelOptInfos yet). That seems painful though because we'd have to
> update the data structure somehow after each subquery pullup.
I realized that we can make that work by doing the pre-calculation
at the start of each pull_up_simple_subquery. We only have to do
it for subqueries marked LATERAL, so this doesn't seem too horribly
inefficient. Draft patch attached. I didn't spend effort on
devising test cases, but it works for your example.
regards, tom lane
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 2e0d41a8d1..ff9c1cd24e 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -42,6 +42,17 @@
#include "rewrite/rewriteManip.h"
+typedef struct nullingrel_info
+{
+ /*
+ * For each leaf RTE, nullingrels[rti] is the set of relids of outer joins
+ * that potentially null that RTE.
+ */
+ Relids *nullingrels;
+ /* Length of range table (maximum index in nullingrels[]) */
+ int rtlength; /* used only for assertion checks */
+} nullingrel_info;
+
typedef struct pullup_replace_vars_context
{
PlannerInfo *root;
@@ -49,6 +60,8 @@ typedef struct pullup_replace_vars_context
RangeTblEntry *target_rte; /* RTE of subquery */
Relids relids; /* relids within subquery, as numbered after
* pullup (set only if target_rte->lateral) */
+ nullingrel_info *nullinfo; /* per-RTE nullingrel info (set only if
+ * target_rte->lateral) */
bool *outer_hasSubLinks; /* -> outer query's hasSubLinks */
int varno; /* varno of subquery */
bool wrap_non_vars; /* do we need all non-Var outputs to be PHVs? */
@@ -142,6 +155,9 @@ static void substitute_phv_relids(Node *node,
static void fix_append_rel_relids(PlannerInfo *root, int varno,
Relids subrelids);
static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
+static nullingrel_info *get_nullingrels(Query *parse);
+static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
+ nullingrel_info *info);
/*
@@ -1259,10 +1275,16 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
rvcontext.targetlist = subquery->targetList;
rvcontext.target_rte = rte;
if (rte->lateral)
+ {
rvcontext.relids = get_relids_in_jointree((Node *) subquery->jointree,
true, true);
- else /* won't need relids */
+ rvcontext.nullinfo = get_nullingrels(parse);
+ }
+ else /* won't need these fields */
+ {
rvcontext.relids = NULL;
+ rvcontext.nullinfo = NULL;
+ }
rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
rvcontext.varno = varno;
/* this flag will be set below, if needed */
@@ -1809,7 +1831,8 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
rvcontext.root = root;
rvcontext.targetlist = tlist;
rvcontext.target_rte = rte;
- rvcontext.relids = NULL;
+ rvcontext.relids = NULL; /* can't be any lateral references here */
+ rvcontext.nullinfo = NULL;
rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
rvcontext.varno = varno;
rvcontext.wrap_non_vars = false;
@@ -1971,9 +1994,10 @@ pull_up_constant_function(PlannerInfo *root, Node *jtnode,
/*
* Since this function was reduced to a Const, it doesn't contain any
* lateral references, even if it's marked as LATERAL. This means we
- * don't need to fill relids.
+ * don't need to fill relids or nullinfo.
*/
rvcontext.relids = NULL;
+ rvcontext.nullinfo = NULL;
rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
rvcontext.varno = ((RangeTblRef *) jtnode)->rtindex;
@@ -2688,9 +2712,42 @@ pullup_replace_vars_callback(Var *var,
{
/*
* There should be Vars/PHVs within the expression that we can
- * modify. Per above discussion, modify only Vars/PHVs of the
- * subquery, not lateral references.
+ * modify. Vars/PHVs of the subquery should have the full
+ * var->varnullingrels added to them, but if there are lateral
+ * references within the expression, those must be marked with
+ * only the nullingrels that potentially apply to them. That
+ * could be different for different lateral relations, so we have
+ * to do this work for each one.
*/
+ if (rcon->target_rte->lateral)
+ {
+ nullingrel_info *nullinfo = rcon->nullinfo;
+ Relids lvarnos;
+ int lvarno;
+
+ /*
+ * Identify lateral varnos used within newnode. We must do
+ * this before injecting var->varnullingrels into the tree.
+ */
+ lvarnos = pull_varnos(rcon->root, newnode);
+ lvarnos = bms_del_members(lvarnos, rcon->relids);
+ /* For each one, add relevant nullingrels if any */
+ lvarno = -1;
+ while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0)
+ {
+ Relids lnullingrels;
+
+ Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
+ lnullingrels = bms_intersect(var->varnullingrels,
+ nullinfo->nullingrels[lvarno]);
+ if (!bms_is_empty(lnullingrels))
+ newnode = add_nulling_relids(newnode,
+ bms_make_singleton(lvarno),
+ lnullingrels);
+ }
+ }
+
+ /* Finally, deal with Vars/PHVs of the subquery itself */
newnode = add_nulling_relids(newnode,
rcon->relids,
var->varnullingrels);
@@ -4120,3 +4177,94 @@ find_jointree_node_for_rel(Node *jtnode, int relid)
(int) nodeTag(jtnode));
return NULL;
}
+
+/*
+ * get_nullingrels: collect info about which outer joins null which relations
+ *
+ * The result struct contains, for each leaf relation used in the query,
+ * the set of relids of outer joins that potentially null that rel.
+ */
+static nullingrel_info *
+get_nullingrels(Query *parse)
+{
+ nullingrel_info *result = palloc_object(nullingrel_info);
+
+ result->rtlength = list_length(parse->rtable);
+ result->nullingrels = palloc0_array(Relids, result->rtlength + 1);
+ get_nullingrels_recurse((Node *) parse->jointree, NULL, result);
+ return result;
+}
+
+/*
+ * Recursive guts of get_nullingrels().
+ *
+ * Note: at any recursion level, the passed-down upper_nullingrels must be
+ * treated as a constant, but it can be stored directly into *info
+ * if we're at leaf level. Upper recursion levels do not free their mutated
+ * copies of the nullingrels, because those are probably referenced by
+ * at least one leaf rel.
+ */
+static void
+get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
+ nullingrel_info *info)
+{
+ if (jtnode == NULL)
+ return;
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+
+ Assert(varno > 0 && varno <= info->rtlength);
+ info->nullingrels[varno] = upper_nullingrels;
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ foreach(l, f->fromlist)
+ {
+ get_nullingrels_recurse(lfirst(l), upper_nullingrels, info);
+ }
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ Relids local_nullingrels;
+
+ switch (j->jointype)
+ {
+ case JOIN_INNER:
+ get_nullingrels_recurse(j->larg, upper_nullingrels, info);
+ get_nullingrels_recurse(j->rarg, upper_nullingrels, info);
+ break;
+ case JOIN_LEFT:
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+ local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+ j->rtindex);
+ get_nullingrels_recurse(j->larg, upper_nullingrels, info);
+ get_nullingrels_recurse(j->rarg, local_nullingrels, info);
+ break;
+ case JOIN_FULL:
+ local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+ j->rtindex);
+ get_nullingrels_recurse(j->larg, local_nullingrels, info);
+ get_nullingrels_recurse(j->rarg, local_nullingrels, info);
+ break;
+ case JOIN_RIGHT:
+ local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+ j->rtindex);
+ get_nullingrels_recurse(j->larg, local_nullingrels, info);
+ get_nullingrels_recurse(j->rarg, upper_nullingrels, info);
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) j->jointype);
+ break;
+ }
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+}
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b54428b38c..2d4c870423 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3665,6 +3665,7 @@ nodeitem
normal_rand_fctx
nsphash_hash
ntile_context
+nullingrel_info
numeric
object_access_hook_type
object_access_hook_type_str