From c91e9bd4be0f4e00b9c63881bd4e2c8d7d0a704f Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Tue, 25 Nov 2025 13:00:20 +0900
Subject: [PATCH v2] Simplify COALESCE expressions using proven non-null
 arguments

The COALESCE function returns the first of its arguments that is not
null.  When an argument is proven non-null, if it is the first
non-null-constant argument, the entire COALESCE expression can be
replaced by that argument.  If it is a subsequent argument, all
following arguments can be dropped, since they will never be reached.

Currently, we apply this simplification only to Const arguments.  We
can extend this logic to Var arguments, since we now have the NOT NULL
attribute information available during constant folding.  We can also
extend it to CoalesceExpr arguments based on the knowledge that a
CoalesceExpr cannot be NULL if at least one of its arguments can be
proven non-nullable.

This can help avoid the overhead of evaluating unreachable arguments.
It can also lead to better plans when the first argument is a NOT NULL
column and thus replaces the expression, as the planner no longer has
to treat the expression as non-strict, and can also leverage index
scans on that column.

Additionally, if we've determined that a CoalesceExpr is non-nullable,
we can reduce its corresponding NullTest quals to a constant during
constant folding.  It might be argued that it is uncommon to write
"COALESCE(...) IS [NOT] NULL" by hand, but this pattern is likely not
uncommon after view expansion, function inlining, or ORM query
generation.  Also, since the reduction does not cost much, I believe
the benefit justifies the cost.
---
 src/backend/optimizer/plan/initsplan.c        | 20 +----
 src/backend/optimizer/util/clauses.c          | 89 ++++++++++++++-----
 src/include/optimizer/optimizer.h             |  2 +-
 src/test/regress/expected/aggregates.out      | 10 +--
 .../regress/expected/generated_virtual.out    | 48 +++++-----
 src/test/regress/expected/predicate.out       | 39 ++++++++
 src/test/regress/sql/generated_virtual.sql    | 11 +--
 src/test/regress/sql/predicate.sql            | 20 +++++
 8 files changed, 162 insertions(+), 77 deletions(-)

diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 65d473d95b6..671c5cde8fc 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -3413,22 +3413,6 @@ add_base_clause_to_rel(PlannerInfo *root, Index relid,
 										 restrictinfo->security_level);
 }
 
-/*
- * expr_is_nonnullable
- *	  Check to see if the Expr cannot be NULL
- *
- * Currently we only support simple Vars.
- */
-static bool
-expr_is_nonnullable(PlannerInfo *root, Expr *expr)
-{
-	/* For now only check simple Vars */
-	if (!IsA(expr, Var))
-		return false;
-
-	return var_is_nonnullable(root, (Var *) expr, true);
-}
-
 /*
  * restriction_is_always_true
  *	  Check to see if the RestrictInfo is always true.
@@ -3465,7 +3449,7 @@ restriction_is_always_true(PlannerInfo *root,
 		if (nulltest->argisrow)
 			return false;
 
-		return expr_is_nonnullable(root, nulltest->arg);
+		return expr_is_nonnullable(root, nulltest->arg, true);
 	}
 
 	/* If it's an OR, check its sub-clauses */
@@ -3530,7 +3514,7 @@ restriction_is_always_false(PlannerInfo *root,
 		if (nulltest->argisrow)
 			return false;
 
-		return expr_is_nonnullable(root, nulltest->arg);
+		return expr_is_nonnullable(root, nulltest->arg, true);
 	}
 
 	/* If it's an OR, check its sub-clauses */
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 202ba8ed4bb..9e6b5708efd 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -131,6 +131,7 @@ static Expr *simplify_function(Oid funcid,
 							   Oid result_collid, Oid input_collid, List **args_p,
 							   bool funcvariadic, bool process_args, bool allow_non_const,
 							   eval_const_expressions_context *context);
+static bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info);
 static List *reorder_function_arguments(List *args, int pronargs,
 										HeapTuple func_tuple);
 static List *add_function_defaults(List *args, int pronargs,
@@ -3318,10 +3319,10 @@ eval_const_expressions_mutator(Node *node,
 													   context);
 
 					/*
-					 * We can remove null constants from the list. For a
-					 * non-null constant, if it has not been preceded by any
-					 * other non-null-constant expressions then it is the
-					 * result. Otherwise, it's the next argument, but we can
+					 * We can remove null constants from the list.  For a
+					 * nonnullable expression, if it has not been preceded by
+					 * any non-null-constant expressions then it is the
+					 * result.  Otherwise, it's the next argument, but we can
 					 * drop following arguments since they will never be
 					 * reached.
 					 */
@@ -3334,6 +3335,14 @@ eval_const_expressions_mutator(Node *node,
 						newargs = lappend(newargs, e);
 						break;
 					}
+					if (expr_is_nonnullable(context->root, (Expr *) e, false))
+					{
+						if (newargs == NIL)
+							return e;	/* first expr */
+						newargs = lappend(newargs, e);
+						break;
+					}
+
 					newargs = lappend(newargs, e);
 				}
 
@@ -3557,30 +3566,27 @@ eval_const_expressions_mutator(Node *node,
 
 					return makeBoolConst(result, false);
 				}
-				if (!ntest->argisrow && arg && IsA(arg, Var) && context->root)
+				if (!ntest->argisrow && arg &&
+					expr_is_nonnullable(context->root, (Expr *) arg, false))
 				{
-					Var		   *varg = (Var *) arg;
 					bool		result;
 
-					if (var_is_nonnullable(context->root, varg, false))
+					switch (ntest->nulltesttype)
 					{
-						switch (ntest->nulltesttype)
-						{
-							case IS_NULL:
-								result = false;
-								break;
-							case IS_NOT_NULL:
-								result = true;
-								break;
-							default:
-								elog(ERROR, "unrecognized nulltesttype: %d",
-									 (int) ntest->nulltesttype);
-								result = false; /* keep compiler quiet */
-								break;
-						}
-
-						return makeBoolConst(result, false);
+						case IS_NULL:
+							result = false;
+							break;
+						case IS_NOT_NULL:
+							result = true;
+							break;
+						default:
+							elog(ERROR, "unrecognized nulltesttype: %d",
+								 (int) ntest->nulltesttype);
+							result = false; /* keep compiler quiet */
+							break;
 					}
+
+					return makeBoolConst(result, false);
 				}
 
 				newntest = makeNode(NullTest);
@@ -4209,7 +4215,7 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
  * use_rel_info indicates whether the corresponding RelOptInfo is available for
  * use.
  */
-bool
+static bool
 var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
 {
 	Bitmapset  *notnullattnums = NULL;
@@ -4261,6 +4267,41 @@ var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
 	return false;
 }
 
+/*
+ * expr_is_nonnullable
+ *	  Check to see if the Expr cannot be NULL
+ *
+ * Currently, we only support simple expressions such as Vars, Consts, and
+ * CoalesceExprs.  Support for other node types may be added in the future.
+ *
+ * use_rel_info is interpreted the same way as in var_is_nonnullable().
+ */
+bool
+expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info)
+{
+	if (IsA(expr, Var) && root)
+		return var_is_nonnullable(root, (Var *) expr, use_rel_info);
+	if (IsA(expr, Const))
+		return !((Const *) expr)->constisnull;
+	if (IsA(expr, CoalesceExpr))
+	{
+		/*
+		 * A CoalesceExpr returns NULL if and only if all its arguments are
+		 * NULL.  Therefore, we can determine that a CoalesceExpr cannot be
+		 * NULL if at least one of its arguments can be proven non-nullable.
+		 */
+		CoalesceExpr *coalesceexpr = (CoalesceExpr *) expr;
+
+		foreach_ptr(Expr, arg, coalesceexpr->args)
+		{
+			if (expr_is_nonnullable(root, arg, use_rel_info))
+				return true;
+		}
+	}
+
+	return false;
+}
+
 /*
  * expand_function_arguments: convert named-notation args to positional args
  * and/or insert default args, as needed
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index d0aa8ab0c1c..ab3badcbda4 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -145,7 +145,7 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node);
 extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
 						   Oid result_collation);
 
-extern bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info);
+extern bool expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info);
 
 extern List *expand_function_arguments(List *args, bool include_out_arguments,
 									   Oid result_type,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index bc83a6e188e..ad9c168822b 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1222,16 +1222,14 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 -- interesting corner case: constant gets optimized into a seqscan
 explain (costs off)
   select max(100) from tenk1;
-                     QUERY PLAN                     
-----------------------------------------------------
+           QUERY PLAN            
+---------------------------------
  Result
    Replaces: MinMaxAggregate
    InitPlan minmax_1
      ->  Limit
-           ->  Result
-                 One-Time Filter: (100 IS NOT NULL)
-                 ->  Seq Scan on tenk1
-(7 rows)
+           ->  Seq Scan on tenk1
+(5 rows)
 
 select max(100) from tenk1;
  max 
diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out
index dde325e46c6..249e68be654 100644
--- a/src/test/regress/expected/generated_virtual.out
+++ b/src/test/regress/expected/generated_virtual.out
@@ -1509,10 +1509,11 @@ create table gtest32 (
   a int primary key,
   b int generated always as (a * 2),
   c int generated always as (10 + 10),
-  d int generated always as (coalesce(a, 100)),
-  e int
+  d int generated always as (coalesce(f, 100)),
+  e int,
+  f int
 );
-insert into gtest32 values (1), (2);
+insert into gtest32 (a, f) values (1, 1), (2, 2);
 analyze gtest32;
 -- Ensure that nullingrel bits are propagated into the generation expressions
 explain (costs off)
@@ -1591,46 +1592,47 @@ where coalesce(t2.b, 1) = 2 or t1.a is null;
 -- Ensure that the generation expressions are wrapped into PHVs if needed
 explain (verbose, costs off)
 select t2.* from gtest32 t1 left join gtest32 t2 on false;
-                          QUERY PLAN                           
----------------------------------------------------------------
+                             QUERY PLAN                              
+---------------------------------------------------------------------
  Nested Loop Left Join
-   Output: t2.a, (t2.a * 2), (20), (COALESCE(t2.a, 100)), t2.e
+   Output: t2.a, (t2.a * 2), (20), (COALESCE(t2.f, 100)), t2.e, t2.f
    Join Filter: false
    ->  Seq Scan on generated_virtual_tests.gtest32 t1
-         Output: t1.a, t1.b, t1.c, t1.d, t1.e
+         Output: t1.a, t1.b, t1.c, t1.d, t1.e, t1.f
    ->  Result
-         Output: t2.a, t2.e, 20, COALESCE(t2.a, 100)
+         Output: t2.a, t2.e, t2.f, 20, COALESCE(t2.f, 100)
          Replaces: Scan on t2
          One-Time Filter: false
 (9 rows)
 
 select t2.* from gtest32 t1 left join gtest32 t2 on false;
- a | b | c | d | e 
----+---+---+---+---
-   |   |   |   |  
-   |   |   |   |  
+ a | b | c | d | e | f 
+---+---+---+---+---+---
+   |   |   |   |   |  
+   |   |   |   |   |  
 (2 rows)
 
 explain (verbose, costs off)
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
-                     QUERY PLAN                      
------------------------------------------------------
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
+                       QUERY PLAN                       
+--------------------------------------------------------
  HashAggregate
-   Output: a, ((a * 2)), (20), (COALESCE(a, 100)), e
+   Output: a, ((a * 2)), (20), (COALESCE(f, 100)), e, f
    Hash Key: t.a
    Hash Key: (t.a * 2)
    Hash Key: 20
-   Hash Key: COALESCE(t.a, 100)
+   Hash Key: COALESCE(t.f, 100)
    Hash Key: t.e
+   Hash Key: t.f
    Filter: ((20) = 20)
    ->  Seq Scan on generated_virtual_tests.gtest32 t
-         Output: a, (a * 2), 20, COALESCE(a, 100), e
-(10 rows)
+         Output: a, (a * 2), 20, COALESCE(f, 100), e, f
+(11 rows)
 
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
- a | b | c  | d | e 
----+---+----+---+---
-   |   | 20 |   |  
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
+ a | b | c  | d | e | f 
+---+---+----+---+---+---
+   |   | 20 |   |   |  
 (1 row)
 
 -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded
diff --git a/src/test/regress/expected/predicate.out b/src/test/regress/expected/predicate.out
index 66fb0854b88..fc12c0cd106 100644
--- a/src/test/regress/expected/predicate.out
+++ b/src/test/regress/expected/predicate.out
@@ -284,6 +284,45 @@ SELECT * FROM pred_tab t1
          ->  Seq Scan on pred_tab t2
 (9 rows)
 
+--
+-- Tests for NullTest reduction for COALESCE expressions
+--
+-- Ensure the IS_NOT_NULL qual is ignored
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, 1) IS NOT NULL;
+      QUERY PLAN      
+----------------------
+ Seq Scan on pred_tab
+(1 row)
+
+-- Ensure the IS_NOT_NULL qual is ignored
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, a) IS NOT NULL;
+      QUERY PLAN      
+----------------------
+ Seq Scan on pred_tab
+(1 row)
+
+-- Ensure the IS_NULL qual is reduced to constant-FALSE
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, 1) IS NULL;
+          QUERY PLAN          
+------------------------------
+ Result
+   Replaces: Scan on pred_tab
+   One-Time Filter: false
+(3 rows)
+
+-- Ensure the IS_NULL qual is reduced to constant-FALSE
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, a) IS NULL;
+          QUERY PLAN          
+------------------------------
+ Result
+   Replaces: Scan on pred_tab
+   One-Time Filter: false
+(3 rows)
+
 DROP TABLE pred_tab;
 -- Validate we handle IS NULL and IS NOT NULL quals correctly with inheritance
 -- parents.
diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql
index 2911439776c..81152b39a79 100644
--- a/src/test/regress/sql/generated_virtual.sql
+++ b/src/test/regress/sql/generated_virtual.sql
@@ -817,11 +817,12 @@ create table gtest32 (
   a int primary key,
   b int generated always as (a * 2),
   c int generated always as (10 + 10),
-  d int generated always as (coalesce(a, 100)),
-  e int
+  d int generated always as (coalesce(f, 100)),
+  e int,
+  f int
 );
 
-insert into gtest32 values (1), (2);
+insert into gtest32 (a, f) values (1, 1), (2, 2);
 analyze gtest32;
 
 -- Ensure that nullingrel bits are propagated into the generation expressions
@@ -859,8 +860,8 @@ select t2.* from gtest32 t1 left join gtest32 t2 on false;
 select t2.* from gtest32 t1 left join gtest32 t2 on false;
 
 explain (verbose, costs off)
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
-select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
+select * from gtest32 t group by grouping sets (a, b, c, d, e, f) having c = 20;
 
 -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded
 alter table gtest32 alter column e type bigint using b;
diff --git a/src/test/regress/sql/predicate.sql b/src/test/regress/sql/predicate.sql
index 32302d60b6d..1fc83e762fc 100644
--- a/src/test/regress/sql/predicate.sql
+++ b/src/test/regress/sql/predicate.sql
@@ -133,6 +133,26 @@ SELECT * FROM pred_tab t1
         (SELECT 1 FROM pred_tab t3, pred_tab t4, pred_tab t5, pred_tab t6
          WHERE t1.a = t3.a AND t6.a IS NULL);
 
+--
+-- Tests for NullTest reduction for COALESCE expressions
+--
+
+-- Ensure the IS_NOT_NULL qual is ignored
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, 1) IS NOT NULL;
+
+-- Ensure the IS_NOT_NULL qual is ignored
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, a) IS NOT NULL;
+
+-- Ensure the IS_NULL qual is reduced to constant-FALSE
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, 1) IS NULL;
+
+-- Ensure the IS_NULL qual is reduced to constant-FALSE
+EXPLAIN (COSTS OFF)
+SELECT * FROM pred_tab WHERE COALESCE(b, a) IS NULL;
+
 DROP TABLE pred_tab;
 
 -- Validate we handle IS NULL and IS NOT NULL quals correctly with inheritance
-- 
2.39.5 (Apple Git-154)

