Hi everyone,

In commit c95cd299, we added an early-exit in `scalararraysel()` to return selectivity 0.0 when a NOT IN / <> ALL list contains a NULL and the operator is strict. The commit message noted a possible follow-up:

    In the future, it might be better to do something for this case in
    constant folding.  We would need to be careful to only do this for
    strict operators on expressions located in places that don't care about
    distinguishing false from NULL returns. i.e. EXPRKIND_QUAL expressions.
    Doing that requires a bit more thought and effort, so here we just fix
    some needlessly slow selectivity estimations for ScalarArrayOpExpr
    containing many array elements and at least one NULL.

This patch implements that follow-up.

When a <> ALL / NOT IN expression appears in a qual context and its array contains a NULL element, the expression can never evaluate to true; it can only return false or NULL. In a qual, both mean the row is excluded. We can therefore fold the entire SAOP to constant false during `eval_const_expressions()`, which the planner can then use to eliminate the scan entirely.

A new `is_qual` flag is added to `eval_const_expressions_context`. A new function `eval_const_expressions_qual()` sets this flag and is called from sites that process WHERE/qual expressions. To prevent the flag from leaking into non-qual contexts (e.g. `func(x NOT IN (NULL, 1))`), is_qual is saved into a local variable and immediately reset to false at the start of `eval_const_expressions_mutator`. Only the SAOP case reads `this_node_is_qual` - after processing its arguments with `is_qual = false`.

Any suggestions?

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC,
https://tantorlabs.com/

From 523648f71f5c7c2a27002f68910a5978988a6db6 Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <[email protected]>
Date: Tue, 16 Jun 2026 22:55:53 +0300
Subject: [PATCH v1] Fold NOT IN / <> ALL with NULL array element to false in
 qual context

When a ScalarArrayOpExpr with useOr=false (NOT IN or <> ALL) appears in
a qual context and its array contains a NULL element, the expression can
never evaluate to true: with a strict operator, comparing any value to
NULL yields NULL, so the overall result is either false or NULL.  In a
qual, both mean the row is excluded, so the expression can be safely
folded to constant false during eval_const_expressions().  This allows
the planner to eliminate the scan entirely rather than performing it and
discarding all rows.

To inform eval_const_expressions() that an expression is used as a
qual, a new entry point eval_const_expressions_qual() is introduced.
It sets a new is_qual flag in eval_const_expressions_context.  The flag
is saved into a local variable and immediately reset to false at the
start of eval_const_expressions_mutator(), so it cannot leak into
sub-expressions where false and NULL are not interchangeable (e.g., an
argument to a non-strict function).  The folding checks
func_strict(saop->opfuncid) explicitly to confirm the operator is
strict before applying the optimization.
---
 src/backend/commands/copy.c               |  2 +-
 src/backend/optimizer/plan/planner.c      |  7 ++-
 src/backend/optimizer/plan/subselect.c    |  2 +-
 src/backend/optimizer/util/clauses.c      | 74 +++++++++++++++++++++++
 src/backend/optimizer/util/inherit.c      |  2 +-
 src/include/optimizer/optimizer.h         |  1 +
 src/test/regress/expected/planner_est.out | 24 ++++----
 7 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 003b70852bb..68cb535f73f 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -205,7 +205,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 			}
 
 			/* Reduce WHERE clause to standard list-of-AND-terms form */
-			whereClause = eval_const_expressions(NULL, whereClause);
+			whereClause = eval_const_expressions_qual(NULL, whereClause);
 
 			whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
 			whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index f4689e7c9f8..40a9fa62416 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1450,7 +1450,12 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 	 * with AND directly under AND, nor OR directly under OR.
 	 */
 	if (kind != EXPRKIND_RTFUNC)
-		expr = eval_const_expressions(root, expr);
+	{
+		if (kind == EXPRKIND_QUAL)
+			expr = eval_const_expressions_qual(root, expr);
+		else
+			expr = eval_const_expressions(root, expr);
+	}
 
 	/*
 	 * If it's a qual or havingQual, canonicalize it.
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 6aa8971c95d..3ab974b821e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1979,7 +1979,7 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
 	subroot.type = T_PlannerInfo;
 	subroot.glob = root->glob;
 	subroot.parse = subselect;
-	whereClause = eval_const_expressions(&subroot, whereClause);
+	whereClause = eval_const_expressions_qual(&subroot, whereClause);
 	whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
 	whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
 
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 07738894d1a..6c3a146e2fc 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -71,6 +71,7 @@ typedef struct
 	List	   *active_fns;
 	Node	   *case_val;
 	bool		estimate;
+	bool		is_qual;		/* true if simplifying a qual expression */
 } eval_const_expressions_context;
 
 typedef struct
@@ -2509,6 +2510,34 @@ eval_const_expressions(PlannerInfo *root, Node *node)
 	context.active_fns = NIL;	/* nothing being recursively simplified */
 	context.case_val = NULL;	/* no CASE being examined */
 	context.estimate = false;	/* safe transformations only */
+	context.is_qual = false;	/* not a qual expression */
+	return eval_const_expressions_mutator(node, &context);
+}
+
+/*--------------------
+ * eval_const_expressions_qual
+ *
+ * Same as eval_const_expressions, but informs the simplifier that the
+ * expression is used as a qual (i.e., in a context where NULL and false have
+ * the same effect).  This enables additional simplifications, such as folding
+ * a NOT IN / <> ALL expression to constant false when the array contains a
+ * NULL element and the operator is strict.
+ *--------------------
+ */
+Node *
+eval_const_expressions_qual(PlannerInfo *root, Node *node)
+{
+	eval_const_expressions_context context;
+
+	if (root)
+		context.boundParams = root->glob->boundParams;	/* bound Params */
+	else
+		context.boundParams = NULL;
+	context.root = root;		/* for inlined-function dependencies */
+	context.active_fns = NIL;	/* nothing being recursively simplified */
+	context.case_val = NULL;	/* no CASE being examined */
+	context.estimate = false;	/* safe transformations only */
+	context.is_qual = true;		/* expression is used as a qual */
 	return eval_const_expressions_mutator(node, &context);
 }
 
@@ -2651,6 +2680,7 @@ estimate_expression_value(PlannerInfo *root, Node *node)
 	context.active_fns = NIL;	/* nothing being recursively simplified */
 	context.case_val = NULL;	/* no CASE being examined */
 	context.estimate = true;	/* unsafe transformations OK */
+	context.is_qual = false;	/* not a qual expression */
 	return eval_const_expressions_mutator(node, &context);
 }
 
@@ -2689,6 +2719,13 @@ static Node *
 eval_const_expressions_mutator(Node *node,
 							   eval_const_expressions_context *context)
 {
+	/*
+	 * Save and reset is_qual so that recursive calls don't inherit it by
+	 * default.
+	 */
+	bool		this_node_is_qual = context->is_qual;
+
+	context->is_qual = false;
 
 	/* since this function recurses, it could be driven to stack overflow */
 	check_stack_depth();
@@ -3155,6 +3192,43 @@ eval_const_expressions_mutator(Node *node,
 				/* Make sure we know underlying function */
 				set_sa_opfuncid(saop);
 
+				/*
+				 * When simplifying a qual expression (!useOr means NOT IN or
+				 * <> ALL), check whether the array contains a NULL element.
+				 * If the operator is strict, a NULL in the array means the
+				 * expression can never be true.
+				 */
+				if (this_node_is_qual && !saop->useOr &&
+					func_strict(saop->opfuncid))
+				{
+					Node	   *arrayarg = lsecond(saop->args);
+
+					if (IsA(arrayarg, Const) &&
+						!((Const *) arrayarg)->constisnull)
+					{
+						/* Constant array: check for NULLs using bitmap */
+						ArrayType  *arrayval =
+							DatumGetArrayTypeP(((Const *) arrayarg)->constvalue);
+
+						if (array_contains_nulls(arrayval))
+							return makeBoolConst(false, false);
+					}
+					else if (IsA(arrayarg, ArrayExpr))
+					{
+						/* Non-const array: check each element */
+						ListCell   *lc2;
+
+						foreach(lc2, ((ArrayExpr *) arrayarg)->elements)
+						{
+							Node	   *elem = (Node *) lfirst(lc2);
+
+							if (IsA(elem, Const) &&
+								((Const *) elem)->constisnull)
+								return makeBoolConst(false, false);
+						}
+					}
+				}
+
 				/*
 				 * If all arguments are Consts, and it's a safe function, we
 				 * can fold to a constant
diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c
index 6a7b9edff3f..f627f1f554e 100644
--- a/src/backend/optimizer/util/inherit.c
+++ b/src/backend/optimizer/util/inherit.c
@@ -864,7 +864,7 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel,
 		childqual = adjust_appendrel_attrs(root,
 										   (Node *) rinfo->clause,
 										   1, &appinfo);
-		childqual = eval_const_expressions(root, childqual);
+		childqual = eval_const_expressions_qual(root, childqual);
 		/* check for flat-out constant */
 		if (childqual && IsA(childqual, Const))
 		{
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index cb6241e2bdd..5be86a38a2f 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -145,6 +145,7 @@ extern bool contain_volatile_functions_after_planning(Expr *expr);
 extern bool contain_volatile_functions_not_nextval(Node *clause);
 
 extern Node *eval_const_expressions(PlannerInfo *root, Node *node);
+extern Node *eval_const_expressions_qual(PlannerInfo *root, Node *node);
 
 extern void convert_saop_to_hashed_saop(Node *node);
 
diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out
index b62a47552fa..453cabf62be 100644
--- a/src/test/regress/expected/planner_est.out
+++ b/src/test/regress/expected/planner_est.out
@@ -192,22 +192,22 @@ false, true, false, true);
 SELECT explain_mask_costs($$
 SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 99, NULL]);$$,
 false, true, false, true);
-                   explain_mask_costs                    
----------------------------------------------------------
- Seq Scan on tenk1  (cost=N..N rows=1 width=N)
-   Filter: (unique1 <> ALL ('{1,2,99,NULL}'::integer[]))
-(2 rows)
+         explain_mask_costs         
+------------------------------------
+ Result  (cost=N..N rows=0 width=N)
+   Replaces: Scan on tenk1
+   One-Time Filter: false
+(3 rows)
 
 -- Try a non-const array containing a NULL
 SELECT explain_mask_costs($$
 SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 98, (SELECT 99), NULL]);$$,
 false, true, false, true);
-                                 explain_mask_costs                                  
--------------------------------------------------------------------------------------
- Seq Scan on tenk1  (cost=N..N rows=1 width=N)
-   Filter: (unique1 <> ALL (ARRAY[1, 2, 98, (InitPlan expr_1).col1, NULL::integer]))
-   InitPlan expr_1
-     ->  Result  (cost=N..N rows=1 width=N)
-(4 rows)
+         explain_mask_costs         
+------------------------------------
+ Result  (cost=N..N rows=0 width=N)
+   Replaces: Scan on tenk1
+   One-Time Filter: false
+(3 rows)
 
 DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
-- 
2.34.1

Reply via email to