From d9f3a8b7e6b9cab30731e48b518bf6cc33dd041d Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Wed, 30 Apr 2025 00:03:07 +1200
Subject: [PATCH v3 1/2] Teach UNION planner to remove dummy inputs

This adjusts UNION planning so that the planner produces more optimal
plans when one or more of the UNION's subqueries has been proven to be
empty (a dummy rel).

If any of the inputs are empty, then that input can be removed from the
Append / MergeAppend.  Previously, a const-false "Result" node would
appear to represent this.  Removing empty inputs has a few extra
benefits when only 1 union child remains as it means the Append or
MergeAppend can be removed in setrefs.c and also we can provide better
n_distinct estimates by looking at the remaining input's statistics.

Author: David Rowley <dgrowleyml@gmail.com>
Discussion: https://postgr.es/m/CAApHDvri53PPF76c3M94_QNWbJfXjyCnjXuj_2=LYM-0m8WZtw@mail.gmail.com
---
 src/backend/optimizer/prep/prepunion.c | 56 +++++++++++++++++++++-----
 src/test/regress/expected/union.out    | 51 +++++++++++++++++++++++
 src/test/regress/sql/union.sql         | 27 +++++++++++++
 3 files changed, 125 insertions(+), 9 deletions(-)

diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 6bd0f4a5dc3..bf10988cd2d 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -523,6 +523,13 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
 		bool		is_sorted;
 		int			presorted_keys;
 
+		/* If the input rel is dummy, propagate that to this query level */
+		if (is_dummy_rel(final_rel))
+		{
+			mark_dummy_rel(rel);
+			continue;
+		}
+
 		/*
 		 * Include the cheapest path as-is so that the set operation can be
 		 * cheaply implemented using a method which does not require the input
@@ -763,6 +770,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		RelOptInfo *rel = lfirst(lc);
 		Path	   *ordered_path;
 
+		/* Skip any UNION children that are proven not to yield any rows */
+		if (is_dummy_rel(rel))
+			continue;
+
 		cheapest_pathlist = lappend(cheapest_pathlist,
 									rel->cheapest_total_path);
 
@@ -812,6 +823,15 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	result_rel->consider_parallel = consider_parallel;
 	result_rel->consider_startup = (root->tuple_fraction > 0);
 
+	/* If all UNION children were dummy rels, make the resulting rel dummy */
+	if (cheapest_pathlist == NIL)
+	{
+		result_rel->reltarget = create_pathtarget(root, list_nth(tlist_list, 0));
+		mark_dummy_rel(result_rel);
+
+		return result_rel;
+	}
+
 	/*
 	 * Append the child results together using the cheapest paths from each
 	 * union child.
@@ -876,15 +896,33 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		bool		can_sort = grouping_is_sortable(groupList);
 		bool		can_hash = grouping_is_hashable(groupList);
 
-		/*
-		 * XXX for the moment, take the number of distinct groups as equal to
-		 * the total input size, i.e., the worst case.  This is too
-		 * conservative, but it's not clear how to get a decent estimate of
-		 * the true size.  One should note as well the propensity of novices
-		 * to write UNION rather than UNION ALL even when they don't expect
-		 * any duplicates...
-		 */
-		dNumGroups = apath->rows;
+		if (list_length(cheapest_pathlist) == 1)
+		{
+			Path	   *path = linitial(cheapest_pathlist);
+
+			/*
+			 * In the case where only one union child remains due to the
+			 * detection of one or more dummy union children, we'll obtain an
+			 * estimate on the surviving child directly.
+			 */
+			dNumGroups = estimate_num_groups(root,
+											 path->pathtarget->exprs,
+											 path->rows,
+											 NULL,
+											 NULL);
+		}
+		else
+		{
+			/*
+			 * Otherwise, for the moment, take the number of distinct groups
+			 * as equal to the total input size, i.e., the worst case.  This
+			 * is too conservative, but it's not clear how to get a decent
+			 * estimate of the true size.  One should note as well the
+			 * propensity of novices to write UNION rather than UNION ALL even
+			 * when they don't expect any duplicates...
+			 */
+			dNumGroups = apath->rows;
+		}
 
 		if (can_hash)
 		{
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index d3ea433db15..7c089e0d598 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1216,6 +1216,57 @@ select event_id
 
 drop table events_child, events, other_events;
 reset enable_indexonlyscan;
+--
+-- Test handling of UNION with provably empty inputs
+--
+-- Ensure the empty UNION input is pruned and de-duplication is done for the
+-- remaining relation.
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+              QUERY PLAN              
+--------------------------------------
+ Sort
+   Output: tenk1.four
+   Sort Key: tenk1.four
+   ->  HashAggregate
+         Output: tenk1.four
+         Group Key: tenk1.four
+         ->  Seq Scan on public.tenk1
+               Output: tenk1.four
+(8 rows)
+
+-- Validate that the results of the above are correct
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+ two 
+-----
+   0
+   1
+   2
+   3
+(4 rows)
+
+-- All UNION inputs are proven empty.  Ensure the planner provides a
+-- const-false Result node
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1 WHERE 1=2
+UNION
+SELECT ten FROM tenk1 WHERE 1=2;
+           QUERY PLAN           
+--------------------------------
+ Result
+   Output: unnamed_subquery.two
+   Replaces: Aggregate
+   One-Time Filter: false
+(4 rows)
+
 -- Test constraint exclusion of UNION ALL subqueries
 explain (costs off)
  SELECT * FROM
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 13700a6bfc4..56bd20e741c 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -459,6 +459,33 @@ drop table events_child, events, other_events;
 
 reset enable_indexonlyscan;
 
+--
+-- Test handling of UNION with provably empty inputs
+--
+
+-- Ensure the empty UNION input is pruned and de-duplication is done for the
+-- remaining relation.
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+
+-- Validate that the results of the above are correct
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+
+-- All UNION inputs are proven empty.  Ensure the planner provides a
+-- const-false Result node
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1 WHERE 1=2
+UNION
+SELECT ten FROM tenk1 WHERE 1=2;
+
 -- Test constraint exclusion of UNION ALL subqueries
 explain (costs off)
  SELECT * FROM
-- 
2.43.0