From 4cc4c985141c302a1579514472eb38942637464d Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 2 Sep 2024 17:01:20 +0900
Subject: [PATCH v1] Remove no-op PlaceHolderVars

We may insert PlaceHolderVars when pulling up a subquery that is
within the nullable side of an outer join.  However, if the outer join
is later reduced to an inner join, the PHVs would become no-ops with
no phnullingrels bits.

It's always desirable to remove these no-op PlaceHolderVars because
they can constrain optimization opportunities, such as blocking
subexpression folding, or forcing join order.  There is one case where
we need to keep a PHV even if its phnullingrels becomes empty: the PHV
is used to enforce separate identity of subexpressions.

This patch removes no-op PlaceHolderVars by marking PHVs that need to
be kept because they are serving to isolate subexpressions.
---
 src/backend/optimizer/prep/prepjointree.c | 17 +++++++++++++++++
 src/backend/optimizer/util/placeholder.c  |  7 ++++---
 src/backend/optimizer/util/restrictinfo.c |  9 +++++++--
 src/backend/rewrite/rewriteManip.c        | 15 ++++++++++++---
 src/include/nodes/pathnodes.h             |  7 +++++++
 5 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 34fbf8ee23..25a4e826c5 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -2442,6 +2442,14 @@ pullup_replace_vars_callback(Var *var,
 				make_placeholder_expr(rcon->root,
 									  (Expr *) newnode,
 									  bms_make_singleton(rcon->varno));
+
+			/*
+			 * If the PHV is used to isolate subexpressions, mark it as
+			 * needing to be kept.
+			 */
+			if (rcon->wrap_non_vars)
+				((PlaceHolderVar *) newnode)->needkeep = true;
+
 			/* cache it with the PHV, and with phlevelsup etc not set yet */
 			rcon->rv_cache[InvalidAttrNumber] = copyObject(newnode);
 		}
@@ -2462,6 +2470,7 @@ pullup_replace_vars_callback(Var *var,
 		if (need_phv)
 		{
 			bool		wrap;
+			bool		isolate_exprs = false;
 
 			if (newnode && IsA(newnode, Var) &&
 				((Var *) newnode)->varlevelsup == 0)
@@ -2494,6 +2503,7 @@ pullup_replace_vars_callback(Var *var,
 			{
 				/* Caller told us to wrap all non-Vars in a PlaceHolderVar */
 				wrap = true;
+				isolate_exprs = true;
 			}
 			else
 			{
@@ -2541,6 +2551,13 @@ pullup_replace_vars_callback(Var *var,
 										  (Expr *) newnode,
 										  bms_make_singleton(rcon->varno));
 
+				/*
+				 * If the PHV is used to isolate subexpressions, mark it as
+				 * needing to be kept.
+				 */
+				if (isolate_exprs)
+					((PlaceHolderVar *) newnode)->needkeep = true;
+
 				/*
 				 * Cache it if possible (ie, if the attno is in range, which
 				 * it probably always should be).
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 81abadd6db..5c678db22d 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -44,9 +44,9 @@ static bool contain_placeholder_references_walker(Node *node,
  * phrels is the syntactic location (as a set of relids) to attribute
  * to the expression.
  *
- * The caller is responsible for adjusting phlevelsup and phnullingrels
- * as needed.  Because we do not know here which query level the PHV
- * will be associated with, it's important that this function touches
+ * The caller is responsible for adjusting phlevelsup, phnullingrels and
+ * needkeep as needed.  Because we do not know here which query level the
+ * PHV will be associated with, it's important that this function touches
  * only root->glob; messing with other parts of PlannerInfo would be
  * likely to do the wrong thing.
  */
@@ -60,6 +60,7 @@ make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels)
 	phv->phnullingrels = NULL;	/* caller may change this later */
 	phv->phid = ++(root->glob->lastPHId);
 	phv->phlevelsup = 0;		/* caller may change this later */
+	phv->needkeep = false;		/* caller may change this later */
 
 	return phv;
 }
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index 0b406e9334..8ec579e86e 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -87,8 +87,13 @@ make_restrictinfo(PlannerInfo *root,
 													   incompatible_relids,
 													   outer_relids);
 
-	/* Shouldn't be an AND clause, else AND/OR flattening messed up */
-	Assert(!is_andclause(clause));
+	/*
+	 * XXX Normally it shouldn't be an AND clause, else AND/OR flattening
+	 * messed up.  An exception occurs if the clause was initially wrapped in
+	 * a PlaceHolderVar and the PlaceHolderVar is removed afterward.  In this
+	 * case the clause may not have been processed for AND/OR flattening by
+	 * preprocess_expression.
+	 */
 
 	return make_restrictinfo_internal(root,
 									  clause,
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index b20625fbd2..58b3e67a49 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -1281,9 +1281,10 @@ remove_nulling_relids_mutator(Node *node,
 		{
 			/*
 			 * Note: it might seem desirable to remove the PHV altogether if
-			 * phnullingrels goes to empty.  Currently we dare not do that
-			 * because we use PHVs in some cases to enforce separate identity
-			 * of subexpressions; see wrap_non_vars usages in prepjointree.c.
+			 * phnullingrels goes to empty.  Currently we only dare to do that
+			 * if the PHV is not marked as needing to be kept, because we use
+			 * PHVs in some cases to enforce separate identity of
+			 * subexpressions; see wrap_non_vars usages in prepjointree.c.
 			 */
 			/* Copy the PlaceHolderVar and mutate what's below ... */
 			phv = (PlaceHolderVar *)
@@ -1297,6 +1298,14 @@ remove_nulling_relids_mutator(Node *node,
 			phv->phrels = bms_difference(phv->phrels,
 										 context->removable_relids);
 			Assert(!bms_is_empty(phv->phrels));
+
+			/*
+			 * Remove the PHV altogether if it is not marked as needing to be
+			 * kept and phnullingrels goes to empty.
+			 */
+			if (!phv->needkeep && bms_is_empty(phv->phnullingrels))
+				return (Node *) phv->phexpr;
+
 			return (Node *) phv;
 		}
 		/* Otherwise fall through to copy the PlaceHolderVar normally */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 540d021592..ab9758587a 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2758,6 +2758,10 @@ typedef struct MergeScanSelCache
  * level of a PlaceHolderVar might be a join rather than a base relation.
  * Likewise, phnullingrels corresponds to varnullingrels.
  *
+ * needkeep indicates whether the PHV needs to be kept when its phnullingrels
+ * becomes empty.  This is set true in cases where the PHV is used to isolate
+ * subexpressions; see wrap_non_vars usages in prepjointree.c.
+ *
  * Although the planner treats this as an expression node type, it is not
  * recognized by the parser or executor, so we declare it here rather than
  * in primnodes.h.
@@ -2796,6 +2800,9 @@ typedef struct PlaceHolderVar
 
 	/* > 0 if PHV belongs to outer query */
 	Index		phlevelsup;
+
+	/* true if PHV needs to be kept */
+	bool		needkeep;
 } PlaceHolderVar;
 
 /*
-- 
2.43.0

