From 52adab31e867a28f3930ff413c470ddbb510992e Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 17 Nov 2025 10:54:17 +0900
Subject: [PATCH v2 3/3] Tighten nullingrels checks for outer joins

When fixing up the targetlist and qpqual of an outer join, we must
account for the effects of the outer join.  Vars and PHVs appearing
there are logically above the join, so they should have nullingrels
equal to the input Vars/PHVs' nullingrels plus the bit added by the
outer join.

Determining the effects of the outer join can be tricky when the join
has been commuted with another one per outer join identity 3.  In this
case, the Vars/PHVs in the join's targetlist and qpqual should have
the same nullingrels that they would if the two joins had been done in
syntactic order.  Unfortunately, in setrefs.c, we don't have enough
information to identify what that should be, so we have to use
superset nullingrels matches instead of exact ones.

However, we can tighten the check somewhat.  Currently, we check
whether the jointype is JOIN_INNER and use NRM_SUPERSET if it is not.
We can improve this by checking whether the Join node has non-empty
ojrelids and using NRM_SUPERSET only in that case.  This allows us to
perform exact matches in more situations.

To support this, we record the outer-join relids in Join plan nodes.
This information can also improve EXPLAIN (RANGE_TABLE) output by
showing which outer-join relids are completed by each Join plan node.
We may discover additional uses for this information in the future.
---
 .../expected/pg_overexplain.out               | 40 ++++++++++++++++++-
 contrib/pg_overexplain/pg_overexplain.c       | 21 ++++++++++
 contrib/pg_overexplain/sql/pg_overexplain.sql | 14 ++++++-
 src/backend/optimizer/plan/createplan.c       | 39 ++++++++++++++++--
 src/backend/optimizer/plan/setrefs.c          | 18 ++++-----
 src/include/nodes/plannodes.h                 |  2 +
 6 files changed, 118 insertions(+), 16 deletions(-)

diff --git a/contrib/pg_overexplain/expected/pg_overexplain.out b/contrib/pg_overexplain/expected/pg_overexplain.out
index 55d34666d87..e8dfab1d9fc 100644
--- a/contrib/pg_overexplain/expected/pg_overexplain.out
+++ b/contrib/pg_overexplain/expected/pg_overexplain.out
@@ -377,14 +377,15 @@ $$);
 (15 rows)
 
 -- Create an index, and then attempt to force a nested loop with inner index
--- scan so that we can see parameter-related information. Also, let's try
--- actually running the query, but try to suppress potentially variable output.
+-- scan so that we can see parameter-related information.
 CREATE INDEX ON vegetables (id);
 ANALYZE vegetables;
 SET enable_hashjoin = false;
 SET enable_material = false;
 SET enable_mergejoin = false;
 SET enable_seqscan = false;
+-- Let's try actually running the query, but try to suppress potentially
+-- variable output.
 SELECT explain_filter($$
 EXPLAIN (BUFFERS OFF, COSTS OFF, SUMMARY OFF, TIMING OFF, ANALYZE, DEBUG)
 SELECT * FROM vegetables v1, vegetables v2 WHERE v1.id = v2.id;
@@ -440,6 +441,41 @@ $$);
    Parse Location: 0 to end
 (47 rows)
 
+-- Test the RANGE_TABLE option with a case that involves an outer join.
+SELECT explain_filter($$
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM daucus d LEFT JOIN brassica b ON d.id = b.id;
+$$);
+                     explain_filter                      
+---------------------------------------------------------
+ Nested Loop Left Join
+   Outer Join RTIs: 3
+   ->  Index Scan using daucus_id_idx on daucus d
+         Scan RTI: 1
+   ->  Index Scan using brassica_id_idx on brassica b
+         Index Cond: (id = d.id)
+         Scan RTI: 2
+ RTI 1 (relation, in-from-clause):
+   Alias: d ()
+   Eref: d (id, name, genus)
+   Relation: daucus
+   Relation Kind: relation
+   Relation Lock Mode: AccessShareLock
+   Permission Info Index: 1
+ RTI 2 (relation, in-from-clause):
+   Alias: b ()
+   Eref: b (id, name, genus)
+   Relation: brassica
+   Relation Kind: relation
+   Relation Lock Mode: AccessShareLock
+   Permission Info Index: 2
+ RTI 3 (join, in-from-clause):
+   Eref: unnamed_join (id, name, genus, id, name, genus)
+   Join Type: Left
+ Unprunable RTIs: 1 2
+(25 rows)
+
+-- Restore default settings.
 RESET enable_hashjoin;
 RESET enable_material;
 RESET enable_mergejoin;
diff --git a/contrib/pg_overexplain/pg_overexplain.c b/contrib/pg_overexplain/pg_overexplain.c
index bd70b6d9d5e..92cfd8af2eb 100644
--- a/contrib/pg_overexplain/pg_overexplain.c
+++ b/contrib/pg_overexplain/pg_overexplain.c
@@ -248,6 +248,27 @@ overexplain_per_node_hook(PlanState *planstate, List *ancestors,
 					overexplain_bitmapset("RTIs",
 										  ((Result *) plan)->relids,
 										  es);
+				break;
+
+			case T_MergeJoin:
+			case T_NestLoop:
+			case T_HashJoin:
+				{
+					Join	   *join = (Join *) plan;
+
+					/*
+					 * 'ojrelids' is only meaningful for non-inner joins, but
+					 * if it somehow ends up set for an inner join, print it
+					 * anyway.
+					 */
+					if (join->jointype != JOIN_INNER ||
+						join->ojrelids != NULL)
+						overexplain_bitmapset("Outer Join RTIs",
+											  join->ojrelids,
+											  es);
+					break;
+				}
+
 			default:
 				break;
 		}
diff --git a/contrib/pg_overexplain/sql/pg_overexplain.sql b/contrib/pg_overexplain/sql/pg_overexplain.sql
index 42e275ac2f9..351b69757cf 100644
--- a/contrib/pg_overexplain/sql/pg_overexplain.sql
+++ b/contrib/pg_overexplain/sql/pg_overexplain.sql
@@ -86,18 +86,28 @@ INSERT INTO vegetables (name, genus)
 $$);
 
 -- Create an index, and then attempt to force a nested loop with inner index
--- scan so that we can see parameter-related information. Also, let's try
--- actually running the query, but try to suppress potentially variable output.
+-- scan so that we can see parameter-related information.
 CREATE INDEX ON vegetables (id);
 ANALYZE vegetables;
 SET enable_hashjoin = false;
 SET enable_material = false;
 SET enable_mergejoin = false;
 SET enable_seqscan = false;
+
+-- Let's try actually running the query, but try to suppress potentially
+-- variable output.
 SELECT explain_filter($$
 EXPLAIN (BUFFERS OFF, COSTS OFF, SUMMARY OFF, TIMING OFF, ANALYZE, DEBUG)
 SELECT * FROM vegetables v1, vegetables v2 WHERE v1.id = v2.id;
 $$);
+
+-- Test the RANGE_TABLE option with a case that involves an outer join.
+SELECT explain_filter($$
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM daucus d LEFT JOIN brassica b ON d.id = b.id;
+$$);
+
+-- Restore default settings.
 RESET enable_hashjoin;
 RESET enable_material;
 RESET enable_mergejoin;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8af091ba647..e516a7ce82b 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -232,14 +232,18 @@ static BitmapOr *make_bitmap_or(List *bitmapplans);
 static NestLoop *make_nestloop(List *tlist,
 							   List *joinclauses, List *otherclauses, List *nestParams,
 							   Plan *lefttree, Plan *righttree,
-							   JoinType jointype, bool inner_unique);
+							   JoinType jointype,
+							   Relids ojrelids,
+							   bool inner_unique);
 static HashJoin *make_hashjoin(List *tlist,
 							   List *joinclauses, List *otherclauses,
 							   List *hashclauses,
 							   List *hashoperators, List *hashcollations,
 							   List *hashkeys,
 							   Plan *lefttree, Plan *righttree,
-							   JoinType jointype, bool inner_unique);
+							   JoinType jointype,
+							   Relids ojrelids,
+							   bool inner_unique);
 static Hash *make_hash(Plan *lefttree,
 					   List *hashkeys,
 					   Oid skewTable,
@@ -253,7 +257,9 @@ static MergeJoin *make_mergejoin(List *tlist,
 								 bool *mergereversals,
 								 bool *mergenullsfirst,
 								 Plan *lefttree, Plan *righttree,
-								 JoinType jointype, bool inner_unique,
+								 JoinType jointype,
+								 Relids ojrelids,
+								 bool inner_unique,
 								 bool skip_mark_restore);
 static Sort *make_sort(Plan *lefttree, int numCols,
 					   AttrNumber *sortColIdx, Oid *sortOperators,
@@ -4189,6 +4195,7 @@ create_nestloop_plan(PlannerInfo *root,
 	Plan	   *outer_plan;
 	Plan	   *inner_plan;
 	Relids		outerrelids;
+	Relids		ojrelids;
 	List	   *tlist = build_path_tlist(root, &best_path->jpath.path);
 	List	   *joinrestrictclauses = best_path->jpath.joinrestrictinfo;
 	List	   *joinclauses;
@@ -4255,6 +4262,11 @@ create_nestloop_plan(PlannerInfo *root,
 			replace_nestloop_params(root, (Node *) otherclauses);
 	}
 
+	/* Identify any outer joins computed at this level */
+	ojrelids = bms_difference(best_path->jpath.path.parent->relids,
+							  bms_union(best_path->jpath.outerjoinpath->parent->relids,
+										best_path->jpath.innerjoinpath->parent->relids));
+
 	/*
 	 * Identify any nestloop parameters that should be supplied by this join
 	 * node, and remove them from root->curOuterParams.
@@ -4326,6 +4338,7 @@ create_nestloop_plan(PlannerInfo *root,
 							  outer_plan,
 							  inner_plan,
 							  best_path->jpath.jointype,
+							  ojrelids,
 							  best_path->jpath.inner_unique);
 
 	copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
@@ -4340,6 +4353,7 @@ create_mergejoin_plan(PlannerInfo *root,
 	MergeJoin  *join_plan;
 	Plan	   *outer_plan;
 	Plan	   *inner_plan;
+	Relids		ojrelids;
 	List	   *tlist = build_path_tlist(root, &best_path->jpath.path);
 	List	   *joinclauses;
 	List	   *otherclauses;
@@ -4418,6 +4432,11 @@ create_mergejoin_plan(PlannerInfo *root,
 	mergeclauses = get_switched_clauses(best_path->path_mergeclauses,
 										best_path->jpath.outerjoinpath->parent->relids);
 
+	/* Identify any outer joins computed at this level */
+	ojrelids = bms_difference(best_path->jpath.path.parent->relids,
+							  bms_union(outer_path->parent->relids,
+										inner_path->parent->relids));
+
 	/*
 	 * Create explicit sort nodes for the outer and inner paths if necessary.
 	 */
@@ -4678,6 +4697,7 @@ create_mergejoin_plan(PlannerInfo *root,
 							   outer_plan,
 							   inner_plan,
 							   best_path->jpath.jointype,
+							   ojrelids,
 							   best_path->jpath.inner_unique,
 							   best_path->skip_mark_restore);
 
@@ -4695,6 +4715,7 @@ create_hashjoin_plan(PlannerInfo *root,
 	Hash	   *hash_plan;
 	Plan	   *outer_plan;
 	Plan	   *inner_plan;
+	Relids		ojrelids;
 	List	   *tlist = build_path_tlist(root, &best_path->jpath.path);
 	List	   *joinclauses;
 	List	   *otherclauses;
@@ -4843,6 +4864,11 @@ create_hashjoin_plan(PlannerInfo *root,
 		hash_plan->rows_total = best_path->inner_rows_total;
 	}
 
+	/* Identify any outer joins computed at this level */
+	ojrelids = bms_difference(best_path->jpath.path.parent->relids,
+							  bms_union(best_path->jpath.outerjoinpath->parent->relids,
+										best_path->jpath.innerjoinpath->parent->relids));
+
 	join_plan = make_hashjoin(tlist,
 							  joinclauses,
 							  otherclauses,
@@ -4853,6 +4879,7 @@ create_hashjoin_plan(PlannerInfo *root,
 							  outer_plan,
 							  (Plan *) hash_plan,
 							  best_path->jpath.jointype,
+							  ojrelids,
 							  best_path->jpath.inner_unique);
 
 	copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path);
@@ -5925,6 +5952,7 @@ make_nestloop(List *tlist,
 			  Plan *lefttree,
 			  Plan *righttree,
 			  JoinType jointype,
+			  Relids ojrelids,
 			  bool inner_unique)
 {
 	NestLoop   *node = makeNode(NestLoop);
@@ -5937,6 +5965,7 @@ make_nestloop(List *tlist,
 	node->join.jointype = jointype;
 	node->join.inner_unique = inner_unique;
 	node->join.joinqual = joinclauses;
+	node->join.ojrelids = ojrelids;
 	node->nestParams = nestParams;
 
 	return node;
@@ -5953,6 +5982,7 @@ make_hashjoin(List *tlist,
 			  Plan *lefttree,
 			  Plan *righttree,
 			  JoinType jointype,
+			  Relids ojrelids,
 			  bool inner_unique)
 {
 	HashJoin   *node = makeNode(HashJoin);
@@ -5969,6 +5999,7 @@ make_hashjoin(List *tlist,
 	node->join.jointype = jointype;
 	node->join.inner_unique = inner_unique;
 	node->join.joinqual = joinclauses;
+	node->join.ojrelids = ojrelids;
 
 	return node;
 }
@@ -6008,6 +6039,7 @@ make_mergejoin(List *tlist,
 			   Plan *lefttree,
 			   Plan *righttree,
 			   JoinType jointype,
+			   Relids ojrelids,
 			   bool inner_unique,
 			   bool skip_mark_restore)
 {
@@ -6027,6 +6059,7 @@ make_mergejoin(List *tlist,
 	node->join.jointype = jointype;
 	node->join.inner_unique = inner_unique;
 	node->join.joinqual = joinclauses;
+	node->join.ojrelids = ojrelids;
 
 	return node;
 }
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 95c5cd05339..51b028e702e 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -2432,13 +2432,13 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
 
 	/*
 	 * Now we need to fix up the targetlist and qpqual, which are logically
-	 * above the join.  This means that, if it's not an inner join, any Vars
-	 * and PHVs appearing here should have nullingrels that include the
-	 * effects of the outer join, ie they will have nullingrels equal to the
-	 * input Vars' nullingrels plus the bit added by the outer join.  We don't
-	 * currently have enough info available here to identify what that should
-	 * be, so we just tell fix_join_expr to accept superset nullingrels
-	 * matches instead of exact ones.
+	 * above the join.  This means that, if it's an outer join with non-empty
+	 * ojrelids, any Vars and PHVs appearing here should have nullingrels that
+	 * include the effects of the outer join, ie they will have nullingrels
+	 * equal to the input Vars' nullingrels plus the bit added by the outer
+	 * join.  We don't currently have enough info available here to identify
+	 * what that should be, so we just tell fix_join_expr to accept superset
+	 * nullingrels matches instead of exact ones.
 	 */
 	join->plan.targetlist = fix_join_expr(root,
 										  join->plan.targetlist,
@@ -2446,7 +2446,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
 										  inner_itlist,
 										  (Index) 0,
 										  rtoffset,
-										  (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET),
+										  (bms_is_empty(join->ojrelids) ? NRM_EQUAL : NRM_SUPERSET),
 										  NUM_EXEC_TLIST((Plan *) join));
 	join->plan.qual = fix_join_expr(root,
 									join->plan.qual,
@@ -2454,7 +2454,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
 									inner_itlist,
 									(Index) 0,
 									rtoffset,
-									(join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET),
+									(bms_is_empty(join->ojrelids) ? NRM_EQUAL : NRM_SUPERSET),
 									NUM_EXEC_QUAL((Plan *) join));
 
 	pfree(outer_itlist);
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index c4393a94321..0b6e5144325 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -945,6 +945,7 @@ typedef struct CustomScan
  * inner_unique each outer tuple can match to no more than one inner tuple
  * joinqual:	qual conditions that came from JOIN/ON or JOIN/USING
  *				(plan.qual contains conditions that came from WHERE)
+ * ojrelids:    outer joins completed at this level
  *
  * When jointype is INNER, joinqual and plan.qual are semantically
  * interchangeable.  For OUTER jointypes, the two are *not* interchangeable;
@@ -969,6 +970,7 @@ typedef struct Join
 	bool		inner_unique;
 	/* JOIN quals (in addition to plan.qual) */
 	List	   *joinqual;
+	Bitmapset  *ojrelids;
 } Join;
 
 /* ----------------
-- 
2.39.5 (Apple Git-154)

