From 2c35bf06afaf01b09cdc65306b672aea7b5ee4d5 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 23 Apr 2025 10:29:15 +0900
Subject: [PATCH v6 1/3] Expand virtual generated columns before sublink
 pull-up

Currently, we expand virtual generated columns after we have pulled up
any SubLinks within the query's quals.  This ensures that the virtual
generated column references within SubLinks that should be transformed
into joins are correctly expanded.  This approach works well and has
posed no issues.

In an upcoming patch, we plan to centralize the collection of catalog
information needed early in the planner.  This will help avoid
repeated table_open/table_close calls for relations in the rangetable.
Since this information is required during sublink pull-up, we are
moving the expansion of virtual generated columns to occur beforehand.

To achieve this, if any EXISTS SubLinks can be pulled up, their
rangetables are processed just before pulling them up.
---
 src/backend/optimizer/plan/planner.c          | 17 +++++++-------
 src/backend/optimizer/plan/subselect.c        | 16 ++++++++++++++
 src/backend/optimizer/prep/prepjointree.c     | 20 +++++++----------
 src/include/optimizer/prep.h                  |  2 +-
 .../regress/expected/generated_virtual.out    | 22 +++++++++++++++++++
 src/test/regress/sql/generated_virtual.sql    |  9 ++++++++
 6 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 549aedcfa99..fbbc42f1600 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -720,6 +720,15 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
 	 */
 	transform_MERGE_to_join(parse);
 
+	/*
+	 * Scan the rangetable for relations with virtual generated columns, and
+	 * replace all Var nodes in the query that reference these columns with
+	 * the generation expressions.  Note that this step does not descend into
+	 * sublinks and subqueries; if we pull up any sublinks or subqueries
+	 * below, their rangetables are processed just before pulling them up.
+	 */
+	parse = root->parse = expand_virtual_generated_columns(root);
+
 	/*
 	 * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
 	 * that we don't need so many special cases to deal with that situation.
@@ -743,14 +752,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
 	 */
 	preprocess_function_rtes(root);
 
-	/*
-	 * Scan the rangetable for relations with virtual generated columns, and
-	 * replace all Var nodes in the query that reference these columns with
-	 * the generation expressions.  Recursion issues here are handled in the
-	 * same way as for SubLinks.
-	 */
-	parse = root->parse = expand_virtual_generated_columns(root);
-
 	/*
 	 * Check to see if any subqueries in the jointree can be merged into this
 	 * query.
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index e7cb3fede66..89e6873da08 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1458,6 +1458,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	int			varno;
 	Relids		clause_varnos;
 	Relids		upper_varnos;
+	PlannerInfo subroot;
 
 	Assert(sublink->subLinkType == EXISTS_SUBLINK);
 
@@ -1487,6 +1488,21 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	if (!simplify_EXISTS_query(root, subselect))
 		return NULL;
 
+	/*
+	 * Scan the rangetable for relations with virtual generated columns, and
+	 * replace all Var nodes in the subquery that reference these columns with
+	 * the generation expressions.
+	 *
+	 * Note: we construct up an entirely dummy PlannerInfo for use here.  This
+	 * is fine because only the "glob" and "parse" links will be used in this
+	 * case.
+	 */
+	MemSet(&subroot, 0, sizeof(subroot));
+	subroot.type = T_PlannerInfo;
+	subroot.glob = root->glob;
+	subroot.parse = subselect;
+	subselect = expand_virtual_generated_columns(&subroot);
+
 	/*
 	 * Separate out the WHERE clause.  (We could theoretically also remove
 	 * top-level plain JOIN/ON clauses, but it's probably not worth the
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 87dc6f56b57..8140d22de70 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -4,10 +4,10 @@
  *	  Planner preprocessing for subqueries and join tree manipulation.
  *
  * NOTE: the intended sequence for invoking these operations is
+ *		expand_virtual_generated_columns
  *		replace_empty_jointree
  *		pull_up_sublinks
  *		preprocess_function_rtes
- *		expand_virtual_generated_columns
  *		pull_up_subqueries
  *		flatten_simple_union_all
  *		do expression preprocessing (including flattening JOIN alias vars)
@@ -958,10 +958,6 @@ preprocess_function_rtes(PlannerInfo *root)
  * generation expressions.  Note that we do not descend into subqueries; that
  * is taken care of when the subqueries are planned.
  *
- * This has to be done after we have pulled up any SubLinks within the query's
- * quals; otherwise any virtual generated column references within the SubLinks
- * that should be transformed into joins wouldn't get expanded.
- *
  * Returns a modified copy of the query tree, if any relations with virtual
  * generated columns are present.
  */
@@ -1333,6 +1329,13 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	/* No CTEs to worry about */
 	Assert(subquery->cteList == NIL);
 
+	/*
+	 * Scan the rangetable for relations with virtual generated columns, and
+	 * replace all Var nodes in the subquery that reference these columns with
+	 * the generation expressions.
+	 */
+	subquery = subroot->parse = expand_virtual_generated_columns(subroot);
+
 	/*
 	 * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
 	 * that we don't need so many special cases to deal with that situation.
@@ -1352,13 +1355,6 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	 */
 	preprocess_function_rtes(subroot);
 
-	/*
-	 * Scan the rangetable for relations with virtual generated columns, and
-	 * replace all Var nodes in the query that reference these columns with
-	 * the generation expressions.
-	 */
-	subquery = subroot->parse = expand_virtual_generated_columns(subroot);
-
 	/*
 	 * Recursively pull up the subquery's subqueries, so that
 	 * pull_up_subqueries' processing is complete for its jointree and
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index df56202777c..ceb731bcf5e 100644
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -22,10 +22,10 @@
  * prototypes for prepjointree.c
  */
 extern void transform_MERGE_to_join(Query *parse);
+extern Query *expand_virtual_generated_columns(PlannerInfo *root);
 extern void replace_empty_jointree(Query *parse);
 extern void pull_up_sublinks(PlannerInfo *root);
 extern void preprocess_function_rtes(PlannerInfo *root);
-extern Query *expand_virtual_generated_columns(PlannerInfo *root);
 extern void pull_up_subqueries(PlannerInfo *root);
 extern void flatten_simple_union_all(PlannerInfo *root);
 extern void reduce_outer_joins(PlannerInfo *root);
diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out
index df704b5166f..9ecf73de814 100644
--- a/src/test/regress/expected/generated_virtual.out
+++ b/src/test/regress/expected/generated_virtual.out
@@ -1604,4 +1604,26 @@ select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
 
 -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded
 alter table gtest32 alter column e type bigint using b;
+-- Ensure that virtual generated column references within SubLinks that should
+-- be transformed into joins can get expanded
+explain (costs off)
+select 1 from gtest32 t1 where exists
+  (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2);
+             QUERY PLAN              
+-------------------------------------
+ Nested Loop Semi Join
+   Join Filter: (t1.a > t2.a)
+   ->  Seq Scan on gtest32 t1
+   ->  Materialize
+         ->  Seq Scan on gtest32 t2
+               Filter: ((a * 2) = 2)
+(6 rows)
+
+select 1 from gtest32 t1 where exists
+  (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2);
+ ?column? 
+----------
+        1
+(1 row)
+
 drop table gtest32;
diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql
index 6fa986515b9..fb0d88cf21a 100644
--- a/src/test/regress/sql/generated_virtual.sql
+++ b/src/test/regress/sql/generated_virtual.sql
@@ -845,4 +845,13 @@ select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20;
 -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded
 alter table gtest32 alter column e type bigint using b;
 
+-- Ensure that virtual generated column references within SubLinks that should
+-- be transformed into joins can get expanded
+explain (costs off)
+select 1 from gtest32 t1 where exists
+  (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2);
+
+select 1 from gtest32 t1 where exists
+  (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2);
+
 drop table gtest32;
-- 
2.43.0

