From b53c42fcc9b3608623a22d4366a8472321e2ac39 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 20 Nov 2023 10:08:28 +0800
Subject: [PATCH v4] Propagate pathkeys from CTEs up to the outer query

---
 src/backend/optimizer/path/allpaths.c  | 17 ++++++++++++++++-
 src/backend/optimizer/plan/planner.c   |  1 +
 src/backend/optimizer/plan/subselect.c | 12 +++++++++---
 src/backend/optimizer/util/pathnode.c  |  5 +++--
 src/include/nodes/pathnodes.h          |  3 +++
 src/include/optimizer/pathnode.h       |  2 +-
 src/test/regress/expected/with.out     | 17 +++++++++++++++++
 src/test/regress/sql/with.sql          |  7 +++++++
 8 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 0b98f0856e..e87f37d219 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2873,6 +2873,8 @@ static void
 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 {
 	Plan	   *cteplan;
+	Path	   *ctepath;
+	List	   *pathkeys;
 	PlannerInfo *cteroot;
 	Index		levelsup;
 	int			ndx;
@@ -2918,6 +2920,19 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	/* Mark rel with estimated output rows, width, etc */
 	set_cte_size_estimates(root, rel, cteplan->plan_rows);
 
+	/*
+	 * Locate the best path previously made for the referenced CTE.  We need to
+	 * know its pathkeys.
+	 */
+	Assert(list_length(root->glob->subpaths) == list_length(root->glob->subplans));
+	ctepath = (Path *) list_nth(root->glob->subpaths, plan_id - 1);
+
+	/* Convert the pathkeys to outer representation */
+	pathkeys = convert_subquery_pathkeys(root,
+										 rel,
+										 ctepath->pathkeys,
+										 cteplan->targetlist);
+
 	/*
 	 * We don't support pushing join clauses into the quals of a CTE scan, but
 	 * it could still have required parameterization due to LATERAL refs in
@@ -2926,7 +2941,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	required_outer = rel->lateral_relids;
 
 	/* Generate appropriate path */
-	add_path(rel, create_ctescan_path(root, rel, required_outer));
+	add_path(rel, create_ctescan_path(root, rel, pathkeys, required_outer));
 }
 
 /*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 6d08cc8cdd..3b60a8c5be 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -307,6 +307,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
 	glob->boundParams = boundParams;
 	glob->subplans = NIL;
 	glob->subroots = NIL;
+	glob->subpaths = NIL;
 	glob->rewindPlanIDs = NULL;
 	glob->finalrtable = NIL;
 	glob->finalrteperminfos = NIL;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d6954a7e86..27ad3fc2f8 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -539,10 +539,12 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
 	}
 
 	/*
-	 * Add the subplan and its PlannerInfo to the global lists.
+	 * Add the subplan and its PlannerInfo, as well as a dummy Path entry, to
+	 * the global lists.
 	 */
 	root->glob->subplans = lappend(root->glob->subplans, plan);
 	root->glob->subroots = lappend(root->glob->subroots, subroot);
+	root->glob->subpaths = lappend(root->glob->subpaths, NULL);
 	splan->plan_id = list_length(root->glob->subplans);
 
 	if (isInitPlan)
@@ -1029,10 +1031,12 @@ SS_process_ctes(PlannerInfo *root)
 		splan->setParam = list_make1_int(paramid);
 
 		/*
-		 * Add the subplan and its PlannerInfo to the global lists.
+		 * Add the subplan and its PlannerInfo, as well as the best path, to
+		 * the global lists.
 		 */
 		root->glob->subplans = lappend(root->glob->subplans, plan);
 		root->glob->subroots = lappend(root->glob->subroots, subroot);
+		root->glob->subpaths = lappend(root->glob->subpaths, best_path);
 		splan->plan_id = list_length(root->glob->subplans);
 
 		root->init_plans = lappend(root->init_plans, splan);
@@ -3021,10 +3025,12 @@ SS_make_initplan_from_plan(PlannerInfo *root,
 	SubPlan    *node;
 
 	/*
-	 * Add the subplan and its PlannerInfo to the global lists.
+	 * Add the subplan and its PlannerInfo, as well as a dummy Path entry, to
+	 * the global lists.
 	 */
 	root->glob->subplans = lappend(root->glob->subplans, plan);
 	root->glob->subroots = lappend(root->glob->subroots, subroot);
+	root->glob->subpaths = lappend(root->glob->subpaths, NULL);
 
 	/*
 	 * Create a SubPlan node and add it to the outer list of InitPlans. Note
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index c29ca5a0da..e4a5ae83e4 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2121,7 +2121,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
  *	  returning the pathnode.
  */
 Path *
-create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
+create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
+					List *pathkeys, Relids required_outer)
 {
 	Path	   *pathnode = makeNode(Path);
 
@@ -2133,7 +2134,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
 	pathnode->parallel_aware = false;
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
-	pathnode->pathkeys = NIL;	/* XXX for now, result is always unordered */
+	pathnode->pathkeys = pathkeys;
 
 	cost_ctescan(pathnode, root, rel, pathnode->param_info);
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 08dbee002f..6611b1e0aa 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -107,6 +107,9 @@ typedef struct PlannerGlobal
 	/* PlannerInfos for SubPlan nodes */
 	List	   *subroots pg_node_attr(read_write_ignore);
 
+	/* best Paths for SubPlan nodes */
+	List	   *subpaths;
+
 	/* indices of subplans that require REWIND */
 	Bitmapset  *rewindPlanIDs;
 
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 99c2f955aa..7cc481b8c2 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -115,7 +115,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
 extern Path *create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
 									   Relids required_outer);
 extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
-								 Relids required_outer);
+								 List *pathkeys, Relids required_outer);
 extern Path *create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel,
 											 Relids required_outer);
 extern Path *create_resultscan_path(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 7bef181d78..b4f3121751 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -672,6 +672,23 @@ select count(*) from tenk1 a
                Index Cond: (unique1 = x.unique1)
 (10 rows)
 
+-- test that pathkeys from a materialized CTE are propagated up to the
+-- outer query
+explain (costs off)
+with x as materialized (select unique1 from tenk1 b order by unique1)
+select count(*) from tenk1 a
+  where unique1 in (select * from x);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Aggregate
+   CTE x
+     ->  Index Only Scan using tenk1_unique1 on tenk1 b
+   ->  Merge Semi Join
+         Merge Cond: (a.unique1 = x.unique1)
+         ->  Index Only Scan using tenk1_unique1 on tenk1 a
+         ->  CTE Scan on x
+(7 rows)
+
 -- SEARCH clause
 create temp table graph0( f int, t int, label text );
 insert into graph0 values
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 037bc0a511..3fb0b33fce 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -359,6 +359,13 @@ with x as materialized (insert into tenk1 default values returning unique1)
 select count(*) from tenk1 a
   where unique1 in (select * from x);
 
+-- test that pathkeys from a materialized CTE are propagated up to the
+-- outer query
+explain (costs off)
+with x as materialized (select unique1 from tenk1 b order by unique1)
+select count(*) from tenk1 a
+  where unique1 in (select * from x);
+
 -- SEARCH clause
 
 create temp table graph0( f int, t int, label text );
-- 
2.31.0

