From 2dc970a0d3b3c89be6c7610ca53341b94a06c18a Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Thu, 4 May 2023 11:37:52 +0800
Subject: [PATCH v2] Allow direct lookups of SpecialJoinInfo by ojrelid.

There are several places where we need to scan the whole join_info_list
looking for SpecialJoinInfos that belong to a given outer join relid
set.  This is usually expensive since it is an O(n) operation.  So
introduce join_info_array to allow direct lookups of SpecialJoinInfo by
ojrelid.  This can benefit existing functions clause_is_computable_at(),
make_outerjoininfo(), check_redundant_nullability_qual(),
get_join_domain_min_rels() and have_unsafe_outer_join_ref(), and maybe
more future usages.
---
 src/backend/optimizer/path/joinpath.c     | 27 +++++----
 src/backend/optimizer/plan/analyzejoins.c |  6 ++
 src/backend/optimizer/plan/initsplan.c    | 70 ++++++++++++++---------
 src/backend/optimizer/util/relnode.c      |  7 +++
 src/backend/optimizer/util/restrictinfo.c | 25 ++++----
 src/include/nodes/pathnodes.h             |  9 +++
 6 files changed, 92 insertions(+), 52 deletions(-)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index cd80e61fd7..40283483a1 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -390,30 +390,29 @@ have_unsafe_outer_join_ref(PlannerInfo *root,
 	bool		result = false;
 	Relids		unsatisfied = bms_difference(inner_paramrels, outerrelids);
 	Relids		satisfied = bms_intersect(inner_paramrels, outerrelids);
+	Relids		unsatisfied_ojrelids;
+	int			i;
 
-	if (bms_overlap(unsatisfied, root->outer_join_rels))
+	unsatisfied_ojrelids = bms_intersect(unsatisfied, root->outer_join_rels);
+
+	i = -1;
+	while ((i = bms_next_member(unsatisfied_ojrelids, i)) >= 0)
 	{
-		ListCell   *lc;
+		SpecialJoinInfo *sjinfo = root->join_info_array[i];
 
-		foreach(lc, root->join_info_list)
+		if (bms_overlap(satisfied, sjinfo->min_righthand) ||
+			(sjinfo->jointype == JOIN_FULL &&
+			 bms_overlap(satisfied, sjinfo->min_lefthand)))
 		{
-			SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
-
-			if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
-				continue;		/* not relevant */
-			if (bms_overlap(satisfied, sjinfo->min_righthand) ||
-				(sjinfo->jointype == JOIN_FULL &&
-				 bms_overlap(satisfied, sjinfo->min_lefthand)))
-			{
-				result = true;	/* doesn't work */
-				break;
-			}
+			result = true;	/* doesn't work */
+			break;
 		}
 	}
 
 	/* Waste no memory when we reject a path here */
 	bms_free(unsatisfied);
 	bms_free(satisfied);
+	bms_free(unsatisfied_ojrelids);
 
 	return result;
 }
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 98cf3494e6..b569239c99 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -107,6 +107,8 @@ restart:
 		 * immediately, we don't bother with foreach_delete_current.)
 		 */
 		root->join_info_list = list_delete_cell(root->join_info_list, lc);
+		if (sjinfo->ojrelid != 0)
+			root->join_info_array[sjinfo->ojrelid] = NULL;
 
 		/*
 		 * Restart the scan.  This is necessary to ensure we find all
@@ -681,6 +683,10 @@ reduce_unique_semijoins(PlannerInfo *root)
 
 		/* OK, remove the SpecialJoinInfo from the list. */
 		root->join_info_list = foreach_delete_current(root->join_info_list, lc);
+		/*
+		 * There is no need to change join_info_array since we do not store
+		 * SpecialJoinInfos for semijoins there.
+		 */
 	}
 }
 
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 06f90882c4..87fefe6138 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1232,7 +1232,12 @@ deconstruct_distribute(PlannerInfo *root, JoinTreeItem *jtitem)
 
 		/* And add the SpecialJoinInfo to join_info_list */
 		if (sjinfo)
+		{
 			root->join_info_list = lappend(root->join_info_list, sjinfo);
+
+			if (sjinfo->ojrelid != 0)
+				root->join_info_array[sjinfo->ojrelid] = sjinfo;
+		}
 	}
 	else
 	{
@@ -1661,18 +1666,24 @@ make_outerjoininfo(PlannerInfo *root,
 		commute_below = bms_union(commute_below_l, commute_below_r);
 		if (!bms_is_empty(commute_below))
 		{
+			int		i;
 			/* Yup, so we must update the data structures */
 			sjinfo->commute_below = commute_below;
-			foreach(l, root->join_info_list)
+			i = -1;
+			while ((i = bms_next_member(commute_below_l, i)) >= 0)
+			{
+				SpecialJoinInfo *otherinfo = root->join_info_array[i];
+
+				otherinfo->commute_above_l =
+					bms_add_member(otherinfo->commute_above_l, ojrelid);
+			}
+			i = -1;
+			while ((i = bms_next_member(commute_below_r, i)) >= 0)
 			{
-				SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);
-
-				if (bms_is_member(otherinfo->ojrelid, commute_below_l))
-					otherinfo->commute_above_l =
-						bms_add_member(otherinfo->commute_above_l, ojrelid);
-				else if (bms_is_member(otherinfo->ojrelid, commute_below_r))
-					otherinfo->commute_above_r =
-						bms_add_member(otherinfo->commute_above_r, ojrelid);
+				SpecialJoinInfo *otherinfo = root->join_info_array[i];
+
+				otherinfo->commute_above_r =
+					bms_add_member(otherinfo->commute_above_r, ojrelid);
 			}
 		}
 	}
@@ -2550,7 +2561,7 @@ static bool
 check_redundant_nullability_qual(PlannerInfo *root, Node *clause)
 {
 	Var		   *forced_null_var;
-	ListCell   *lc;
+	int			i;
 
 	/* Check for IS NULL, and identify the Var forced to NULL */
 	forced_null_var = find_forced_null_var(clause);
@@ -2560,23 +2571,27 @@ check_redundant_nullability_qual(PlannerInfo *root, Node *clause)
 	/*
 	 * If the Var comes from the nullable side of a lower antijoin, the IS
 	 * NULL condition is necessarily true.  If it's not nulled by anything,
-	 * there is no point in searching the join_info_list.  Otherwise, we need
-	 * to find out whether the nulling rel is an antijoin.
+	 * there is no point in searching the SpecialJoinInfos.  Otherwise, we
+	 * need to find out whether the nulling rel is an antijoin.
 	 */
 	if (forced_null_var->varnullingrels == NULL)
 		return false;
 
-	foreach(lc, root->join_info_list)
+	/*
+	 * An antijoin that was converted from a semijoin will have zero
+	 * sjinfo->ojrelid; but in such a case the Var couldn't have come from
+	 * its nullable side.  So we can just ignore such antijoins and search
+	 * the join_info_array directly with indexes from the Var's nulling
+	 * bitmap.
+	 */
+	i = -1;
+	while ((i = bms_next_member(forced_null_var->varnullingrels, i)) >= 0)
 	{
-		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+		SpecialJoinInfo *sjinfo = root->join_info_array[i];
 
-		/*
-		 * This test will not succeed if sjinfo->ojrelid is zero, which is
-		 * possible for an antijoin that was converted from a semijoin; but in
-		 * such a case the Var couldn't have come from its nullable side.
-		 */
-		if (sjinfo->jointype == JOIN_ANTI && sjinfo->ojrelid != 0 &&
-			bms_is_member(sjinfo->ojrelid, forced_null_var->varnullingrels))
+		Assert(sjinfo->ojrelid != 0);
+
+		if (sjinfo->jointype == JOIN_ANTI)
 			return true;
 	}
 
@@ -2889,19 +2904,22 @@ static Relids
 get_join_domain_min_rels(PlannerInfo *root, Relids domain_relids)
 {
 	Relids		result = bms_copy(domain_relids);
-	ListCell   *lc;
+	Relids		domain_ojrelids;
+	int			i;
 
 	/* Top-level join domain? */
 	if (bms_equal(result, root->all_query_rels))
 		return result;
 
+	domain_ojrelids = bms_intersect(result, root->outer_join_rels);
+
 	/* Nope, look for lower outer joins that could potentially commute out */
-	foreach(lc, root->join_info_list)
+	i = -1;
+	while ((i = bms_next_member(domain_ojrelids, i)) >= 0)
 	{
-		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+		SpecialJoinInfo *sjinfo = root->join_info_array[i];
 
-		if (sjinfo->jointype == JOIN_LEFT &&
-			bms_is_member(sjinfo->ojrelid, result))
+		if (sjinfo->jointype == JOIN_LEFT)
 		{
 			result = bms_del_member(result, sjinfo->ojrelid);
 			result = bms_del_members(result, sjinfo->syn_righthand);
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 68fd033595..97ea14667b 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -117,6 +117,13 @@ setup_simple_rel_arrays(PlannerInfo *root)
 		root->simple_rte_array[rti++] = rte;
 	}
 
+	/*
+	 * join_info_array is initialized to all NULLs and will be filled by
+	 * later calls to deconstruct_distribute().
+	 */
+	root->join_info_array = (SpecialJoinInfo **)
+		palloc0(size * sizeof(SpecialJoinInfo *));
+
 	/* append_rel_array is not needed if there are no AppendRelInfos */
 	if (root->append_rel_list == NIL)
 	{
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index c44bd2f815..4961cf3cbf 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -544,25 +544,26 @@ clause_is_computable_at(PlannerInfo *root,
 						Relids clause_relids,
 						Relids eval_relids)
 {
-	ListCell   *lc;
+	Relids		relevant_relids;
+	int			i;
 
 	/* Nothing to do if no outer joins have been performed yet. */
 	if (!bms_overlap(eval_relids, root->outer_join_rels))
 		return true;
 
-	foreach(lc, root->join_info_list)
-	{
-		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
-
-		/* Ignore outer joins that are not yet performed. */
-		if (!bms_is_member(sjinfo->ojrelid, eval_relids))
-			continue;
+	/*
+	 * Calculate relevant_relids as being the outer joins that have been
+	 * evaluated and not listed in the clause's nullingrels.
+	 */
+	relevant_relids = bms_intersect(eval_relids, root->outer_join_rels);
+	relevant_relids = bms_difference(relevant_relids, clause_relids);
 
-		/* OK if clause lists it (we assume all Vars in it agree). */
-		if (bms_is_member(sjinfo->ojrelid, clause_relids))
-			continue;
+	i = -1;
+	while ((i = bms_next_member(relevant_relids, i)) >= 0)
+	{
+		SpecialJoinInfo *sjinfo = root->join_info_array[i];
 
-		/* Else, trouble if clause mentions any nullable Vars. */
+		/* Trouble if clause mentions any nullable Vars. */
 		if (bms_overlap(clause_relids, sjinfo->min_righthand) ||
 			(sjinfo->jointype == JOIN_FULL &&
 			 bms_overlap(clause_relids, sjinfo->min_lefthand)))
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 7d4f24d250..06b0ec18f5 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -245,6 +245,15 @@ struct PlannerInfo
 	 */
 	struct AppendRelInfo **append_rel_array pg_node_attr(read_write_ignore);
 
+	/*
+	 * join_info_array is the same length as the above arrays, and holds
+	 * pointers to the corresponding SpecialJoinInfo entry indexed by ojrelid,
+	 * or NULL if the RTE is not an outer join.  Note that we do not store
+	 * SpecialJoinInfos with 0 ojrelid in this array.  (Not printed because
+	 * it'd be redundant with join_info_list.)
+	 */
+	struct SpecialJoinInfo **join_info_array pg_node_attr(read_write_ignore);
+
 	/*
 	 * all_baserels is a Relids set of all base relids (but not joins or
 	 * "other" rels) in the query.  This is computed in deconstruct_jointree.
-- 
2.31.0

