On Fri, Dec 15, 2023 at 5:22 AM Ashutosh Bapat
<ashutosh.bapat....@gmail.com> wrote:

> >
> > That looks pretty small considering the benefits. What do you think?
> >
> > [1] 
> > https://www.postgresql.org/message-id/caexhw5stmouobe55pmt83r8uxvfcph+pvo5dnpdrvcsbgxe...@mail.gmail.com
>
> If you want to experiment, please use attached patches. There's a fix
> for segfault during initdb in them. The patches are still raw.

First patch is no longer required. Here's rebased set

The patches are raw. make check has some crashes that I need to fix. I
am waiting to hear whether this is useful and whether the design is on
the right track.

-- 
Best Wishes,
Ashutosh Bapat
From 0a80eea476b696ad980a0a63858e2658a1b5b0de Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.ba...@enterprisedb.com>
Date: Mon, 17 Jul 2023 10:44:18 +0530
Subject: [PATCH 1/3] Basic infrastructure to link, unlink and free pathnodes

add_path and add_partial_path free path nodes which they deem sub-optimal. But
they just free the uppermost pathnode in the path. The subpaths continue to
occupy memory even if they are not used anywhere. The subpath nodes are not
freed because they may be referenced by other paths. This commit introduces the
infrastructure to track references to paths.

As the path nodes are referenced they are "linked" using link_path() to
referencing objects. When the path nodes are no longer useful they are
"unlinked" using unlink_path() from the referencing objects. The paths whose
references reach 0 during unlinking are freed automatically using
free_path(). The function unlinks the sub path nodes and can be called
when freeing any path node.

When the final path for join tree is chosen the paths linked to each
participating relation are unlinked, thus ultimately getting freed if not used.

TODO
The functions free_path(), unlink_path() and link_path() have ereports
to catch code paths which do not use these function correctly. They call
errbacktrace() which is not used anywhere else. These ereport calls will
need to be fixed for productization.
---
 src/backend/optimizer/path/allpaths.c |  77 ++++++++++++++
 src/backend/optimizer/util/pathnode.c | 142 ++++++++++++++++++++++++++
 src/include/nodes/pathnodes.h         |   1 +
 src/include/optimizer/pathnode.h      |  38 +++++++
 4 files changed, 258 insertions(+)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 84c4de488a..9c02f8adb5 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -3388,6 +3388,81 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
 	}
 }
 
+/*
+ * TODO: Need similar code to free paths in upper relations.
+ */
+static void
+free_unused_paths_from_rel(RelOptInfo *rel)
+{
+	ListCell   *lc_path;
+	int			cnt_part;
+
+	foreach(lc_path, rel->pathlist)
+	{
+		Path	   *path = (Path *) lfirst(lc_path);
+
+		/* Free the path if none references it. */
+		if (path->ref_count == 1)
+		{
+			/* TODO: use routine to unlink path from list */
+			rel->pathlist = foreach_delete_current(rel->pathlist, lc_path);
+			unlink_path(&path);
+		}
+	}
+
+	/* Do the same for partial pathlist. */
+	foreach(lc_path, rel->partial_pathlist)
+	{
+		Path	   *path = (Path *) lfirst(lc_path);
+
+		/* Free the path if none references it. */
+		if (path->ref_count == 1)
+		{
+			rel->partial_pathlist = foreach_delete_current(rel->partial_pathlist, lc_path);
+			unlink_path(&path);
+		}
+	}
+
+	/*
+	 * TODO: We can perform this in generate_partitionwise_paths as well since
+	 * by that time all the paths from partitions will be linked if used.
+	 */
+
+	/* Free unused paths from the partition relations */
+	for (cnt_part = 0; cnt_part < rel->nparts; cnt_part++)
+	{
+		free_unused_paths_from_rel(rel->part_rels[cnt_part]);
+	}
+}
+
+/*
+ * Free unused paths from all the relations created while building the join tree.
+ */
+static void
+free_unused_paths(PlannerInfo *root, int levels_needed)
+{
+	int			cnt;
+
+	for (cnt = levels_needed - 1; cnt >= 1; cnt--)
+	{
+		ListCell   *lc;
+
+		foreach(lc, root->join_rel_level[cnt])
+		{
+			free_unused_paths_from_rel(lfirst(lc));
+		}
+	}
+
+	for (cnt = 0; cnt < root->simple_rel_array_size; cnt++)
+	{
+		RelOptInfo *rel = root->simple_rel_array[cnt];
+
+		/* Skip empty slots */
+		if (rel != NULL)
+			free_unused_paths_from_rel(rel);
+	}
+}
+
 /*
  * standard_join_search
  *	  Find possible joinpaths for a query by successively finding ways
@@ -3490,6 +3565,8 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 		}
 	}
 
+	free_unused_paths(root, levels_needed);
+
 	/*
 	 * We should have a single rel at the final level.
 	 */
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 2e1ec41a54..084d48b66b 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -915,6 +915,148 @@ add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost,
 	return true;
 }
 
+void
+free_path(Path *path)
+{
+	/*
+	 * If the path is still referenced, freeing it would create a dangling
+	 * pointer.
+	 */
+	/* TODO: it could just be an Assert? */
+	if (path->ref_count != 0)
+	{
+		ereport(WARNING,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errbacktrace(),
+				 errmsg("path node %p of type %d has reference count %d, can not be freed",
+						path, path->pathtype, path->ref_count)));
+		return;
+	}
+
+	/*
+	 * A path referenced in the parent relation's pathlist can't be freed.
+	 * Ideally such a path should have ref_count >= 1. If this happens it
+	 * indicates that the path was not linked somewhere, and yet unlinked
+	 * (usually by free_path()).
+	 */
+	if (list_member_ptr(path->parent->pathlist, path))
+	{
+		ereport(WARNING,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errbacktrace(),
+				 errmsg("path node %p of type %d has reference count %d but is linked in parent's pathlist, can not be freed",
+						path, path->pathtype, path->ref_count)));
+		return;
+	}
+
+	/* Decrement the reference counts of paths referenced by this one. */
+	switch (path->pathtype)
+	{
+		case T_SeqScan:
+		case T_IndexScan:
+		case T_IndexOnlyScan:
+			/* Simple paths reference no other path. */
+			break;
+
+		case T_MergeJoin:
+		case T_HashJoin:
+		case T_NestLoop:
+			{
+				JoinPath   *jpath = (JoinPath *) path;
+
+				unlink_path(&(jpath->outerjoinpath));
+				unlink_path(&(jpath->innerjoinpath));
+			}
+			break;
+
+		case T_Append:
+		case T_MergeAppend:
+			{
+				AppendPath *appath = (AppendPath *) path;
+				ListCell   *lc;
+
+				foreach(lc, appath->subpaths)
+				{
+					Path	   *subpath = lfirst(lc);
+
+					/* TODO use a routine to unlink path from list. */
+					appath->subpaths = foreach_delete_current(appath->subpaths, lc);
+					unlink_path(&subpath);
+				}
+			}
+			break;
+
+		case T_Gather:
+			{
+				GatherPath *gpath = (GatherPath *) path;
+
+				unlink_path(&(gpath->subpath));
+			}
+			break;
+
+		case T_GatherMerge:
+			{
+				GatherMergePath *gmpath = (GatherMergePath *) path;
+
+				unlink_path(&gmpath->subpath);
+			}
+			break;
+
+		case T_BitmapHeapScan:
+			{
+				BitmapHeapPath *bhpath = (BitmapHeapPath *) path;
+
+				unlink_path(&(bhpath->bitmapqual));
+			}
+			break;
+
+		case T_Material:
+			{
+				MaterialPath *mpath = (MaterialPath *) path;
+
+				unlink_path(&(mpath->subpath));
+			}
+			break;
+
+		case T_Memoize:
+			{
+				MemoizePath *mpath = (MemoizePath *) path;
+
+				unlink_path(&mpath->subpath);
+			}
+			break;
+
+		case T_Result:
+			{
+				/*
+				 * All Result paths except ProjectionPath are simple paths
+				 * without any subpath.
+				 */
+				if (IsA(path, ProjectionPath))
+				{
+					ProjectionPath *ppath = (ProjectionPath *) path;
+
+					unlink_path(&ppath->subpath);
+				}
+			}
+			break;
+
+		default:
+			ereport(WARNING,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					 errbacktrace(),
+					 errmsg("unrecognized path type %d", path->pathtype)));
+			break;
+	}
+
+	/*
+	 * TODO: add_path does not free IndexPaths, but we do that here. Is there
+	 * a hazard?
+	 */
+
+	/* Now reclaim the memory. */
+	pfree(path);
+}
 
 /*****************************************************************************
  *		PATH NODE CREATION ROUTINES
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 534692bee1..48a8176e19 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1643,6 +1643,7 @@ typedef struct Path
 
 	/* sort ordering of path's output; a List of PathKey nodes; see above */
 	List	   *pathkeys;
+	int			ref_count;
 } Path;
 
 /* Macro for extracting a path's parameterization relids; beware double eval */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index c43d97b48a..90bff53dda 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -16,6 +16,7 @@
 
 #include "nodes/bitmapset.h"
 #include "nodes/pathnodes.h"
+#include "limits.h"
 
 
 /*
@@ -298,6 +299,43 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path,
 								 double loop_count);
 extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path,
 										  RelOptInfo *child_rel);
+extern void free_path(Path *path);
+
+static inline void
+link_path(Path **path_link, Path *path)
+{
+	if (path->ref_count < 0)
+		ereport(WARNING,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errbacktrace(),
+				 errmsg("path node %p of type %d has negative reference count %d",
+						path, path->pathtype, path->ref_count)));
+
+	path->ref_count++;
+	*path_link = path;
+	Assert(path->ref_count > 0 && path->ref_count <= INT_MAX);
+}
+
+static inline void
+unlink_path(Path **path_link)
+{
+	Path	   *path = *path_link;
+
+	/* A path to be unlinked should have been linked before. */
+	if (path->ref_count < 0)
+		ereport(WARNING,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errbacktrace(),
+				 errmsg("path node %p of type %d had negative reference count %d",
+						path, path->pathtype, path->ref_count)));
+
+	path->ref_count--;
+	*path_link = NULL;
+
+	/* Free path if none is referencing it. */
+	if (path->ref_count == 0)
+		free_path(path);
+}
 
 /*
  * prototypes for relnode.c
-- 
2.25.1

From 6c031c4f7130ca2cb3cbc6a599e1b7f35bfd2703 Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.ba...@enterprisedb.com>
Date: Mon, 17 Jul 2023 10:57:30 +0530
Subject: [PATCH 2/3] Actual code to use pathnode referencing infrastructure

The commit uses the infrastructure built by the previous commit to references,
unreference and free paths.

The code is not completely mature. There are TODOs.
---
 src/backend/optimizer/util/pathnode.c | 124 +++++++++++++++-----------
 1 file changed, 72 insertions(+), 52 deletions(-)

diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 084d48b66b..37fc0b7a2b 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -588,10 +588,10 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 														  p1);
 
 			/*
-			 * Delete the data pointed-to by the deleted cell, if possible
+			 * TODO: write a routine to unlink a path from the list node and
+			 * delete the list node
 			 */
-			if (!IsA(old_path, IndexPath))
-				pfree(old_path);
+			unlink_path(&old_path);
 		}
 		else
 		{
@@ -614,12 +614,12 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 		/* Accept the new path: insert it at proper place in pathlist */
 		parent_rel->pathlist =
 			list_insert_nth(parent_rel->pathlist, insert_at, new_path);
+		new_path->ref_count++;
+		/* TODO: write a function to link_path in a List */
 	}
 	else
 	{
-		/* Reject and recycle the new path */
-		if (!IsA(new_path, IndexPath))
-			pfree(new_path);
+		free_path(new_path);
 	}
 }
 
@@ -822,7 +822,8 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
 		{
 			parent_rel->partial_pathlist =
 				foreach_delete_current(parent_rel->partial_pathlist, p1);
-			pfree(old_path);
+			/* TODO use routine to unlink a path from the linked list */
+			unlink_path(&old_path);
 		}
 		else
 		{
@@ -845,11 +846,13 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
 		/* Accept the new path: insert it at proper place */
 		parent_rel->partial_pathlist =
 			list_insert_nth(parent_rel->partial_pathlist, insert_at, new_path);
+		new_path->ref_count++;
+		/* TODO create a routine to link path in a list at a given place */
 	}
 	else
 	{
 		/* Reject and recycle the new path */
-		pfree(new_path);
+		free_path(new_path);
 	}
 }
 
@@ -1202,7 +1205,7 @@ create_bitmap_heap_path(PlannerInfo *root,
 	pathnode->path.parallel_workers = parallel_degree;
 	pathnode->path.pathkeys = NIL;	/* always unordered */
 
-	pathnode->bitmapqual = bitmapqual;
+	link_path(&(pathnode->bitmapqual), bitmapqual);
 
 	cost_bitmap_heap_scan(&pathnode->path, root, rel,
 						  pathnode->path.param_info,
@@ -1237,6 +1240,8 @@ create_bitmap_and_path(PlannerInfo *root,
 	{
 		Path	   *bitmapqual = (Path *) lfirst(lc);
 
+		/* TODO: find a better way to link a path in a linked list */
+		bitmapqual->ref_count++;
 		required_outer = bms_add_members(required_outer,
 										 PATH_REQ_OUTER(bitmapqual));
 	}
@@ -1289,6 +1294,8 @@ create_bitmap_or_path(PlannerInfo *root,
 	{
 		Path	   *bitmapqual = (Path *) lfirst(lc);
 
+		/* TODO: find a better way to link a path in a linked list */
+		bitmapqual->ref_count++;
 		required_outer = bms_add_members(required_outer,
 										 PATH_REQ_OUTER(bitmapqual));
 	}
@@ -1458,6 +1465,8 @@ create_append_path(PlannerInfo *root,
 	{
 		Path	   *subpath = (Path *) lfirst(l);
 
+		/* TODO: we should use the routine to link path to list */
+		subpath->ref_count++;
 		pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
 			subpath->parallel_safe;
 
@@ -1593,6 +1602,9 @@ create_merge_append_path(PlannerInfo *root,
 	{
 		Path	   *subpath = (Path *) lfirst(l);
 
+		/* TODO: use routine which links a path into a list */
+		subpath->ref_count++;
+
 		pathnode->path.rows += subpath->rows;
 		pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
 			subpath->parallel_safe;
@@ -1719,7 +1731,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
 	pathnode->path.parallel_workers = subpath->parallel_workers;
 	pathnode->path.pathkeys = subpath->pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	cost_material(&pathnode->path,
 				  subpath->startup_cost,
@@ -1753,7 +1765,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->path.parallel_workers = subpath->parallel_workers;
 	pathnode->path.pathkeys = subpath->pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->hash_operators = hash_operators;
 	pathnode->param_exprs = param_exprs;
 	pathnode->singlerow = singlerow;
@@ -1844,7 +1856,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	 */
 	pathnode->path.pathkeys = NIL;
 
-	pathnode->subpath = subpath;
+	link_path(&(pathnode->subpath), subpath);
 	pathnode->in_operators = sjinfo->semi_operators;
 	pathnode->uniq_exprs = sjinfo->semi_rhs_exprs;
 
@@ -1865,7 +1877,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		pathnode->path.total_cost = subpath->total_cost;
 		pathnode->path.pathkeys = subpath->pathkeys;
 
-		rel->cheapest_unique_path = (Path *) pathnode;
+		/* TODO: Do we need to make sure that cheapest_unique_path is NULL. */
+		link_path(&(rel->cheapest_unique_path), (Path *) pathnode);
 
 		MemoryContextSwitchTo(oldcontext);
 
@@ -1903,7 +1916,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 				pathnode->path.total_cost = subpath->total_cost;
 				pathnode->path.pathkeys = subpath->pathkeys;
 
-				rel->cheapest_unique_path = (Path *) pathnode;
+				link_path(&(rel->cheapest_unique_path), (Path *) pathnode);
 
 				MemoryContextSwitchTo(oldcontext);
 
@@ -1998,7 +2011,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		pathnode->path.total_cost = sort_path.total_cost;
 	}
 
-	rel->cheapest_unique_path = (Path *) pathnode;
+	link_path(&(rel->cheapest_unique_path), (Path *) pathnode);
 
 	MemoryContextSwitchTo(oldcontext);
 
@@ -2029,7 +2042,7 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 														  required_outer);
 	pathnode->path.parallel_aware = false;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->num_workers = subpath->parallel_workers;
 	pathnode->path.pathkeys = pathkeys;
 	pathnode->path.pathtarget = target ? target : rel->reltarget;
@@ -2120,7 +2133,7 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->path.parallel_workers = 0;
 	pathnode->path.pathkeys = NIL;	/* Gather has unordered result */
 
-	pathnode->subpath = subpath;
+	link_path(&(pathnode->subpath), subpath);
 	pathnode->num_workers = subpath->parallel_workers;
 	pathnode->single_copy = false;
 
@@ -2163,7 +2176,7 @@ create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		subpath->parallel_safe;
 	pathnode->path.parallel_workers = subpath->parallel_workers;
 	pathnode->path.pathkeys = pathkeys;
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info,
 					  trivial_pathtarget);
@@ -2392,7 +2405,7 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->path.total_cost = total_cost;
 	pathnode->path.pathkeys = pathkeys;
 
-	pathnode->fdw_outerpath = fdw_outerpath;
+	link_path(&pathnode->fdw_outerpath, fdw_outerpath);
 	pathnode->fdw_restrictinfo = fdw_restrictinfo;
 	pathnode->fdw_private = fdw_private;
 
@@ -2444,7 +2457,7 @@ create_foreign_join_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->path.total_cost = total_cost;
 	pathnode->path.pathkeys = pathkeys;
 
-	pathnode->fdw_outerpath = fdw_outerpath;
+	link_path(&pathnode->fdw_outerpath, fdw_outerpath);
 	pathnode->fdw_restrictinfo = fdw_restrictinfo;
 	pathnode->fdw_private = fdw_private;
 
@@ -2491,7 +2504,7 @@ create_foreign_upper_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->path.total_cost = total_cost;
 	pathnode->path.pathkeys = pathkeys;
 
-	pathnode->fdw_outerpath = fdw_outerpath;
+	link_path(&pathnode->fdw_outerpath, fdw_outerpath);
 	pathnode->fdw_restrictinfo = fdw_restrictinfo;
 	pathnode->fdw_private = fdw_private;
 
@@ -2644,8 +2657,8 @@ create_nestloop_path(PlannerInfo *root,
 	pathnode->jpath.path.pathkeys = pathkeys;
 	pathnode->jpath.jointype = jointype;
 	pathnode->jpath.inner_unique = extra->inner_unique;
-	pathnode->jpath.outerjoinpath = outer_path;
-	pathnode->jpath.innerjoinpath = inner_path;
+	link_path(&(pathnode->jpath.outerjoinpath), outer_path);
+	link_path(&(pathnode->jpath.innerjoinpath), inner_path);
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
 
 	final_cost_nestloop(root, pathnode, workspace, extra);
@@ -2708,8 +2721,8 @@ create_mergejoin_path(PlannerInfo *root,
 	pathnode->jpath.path.pathkeys = pathkeys;
 	pathnode->jpath.jointype = jointype;
 	pathnode->jpath.inner_unique = extra->inner_unique;
-	pathnode->jpath.outerjoinpath = outer_path;
-	pathnode->jpath.innerjoinpath = inner_path;
+	link_path(&(pathnode->jpath.outerjoinpath), outer_path);
+	link_path(&(pathnode->jpath.innerjoinpath), inner_path);
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
 	pathnode->path_mergeclauses = mergeclauses;
 	pathnode->outersortkeys = outersortkeys;
@@ -2785,8 +2798,8 @@ create_hashjoin_path(PlannerInfo *root,
 	pathnode->jpath.path.pathkeys = NIL;
 	pathnode->jpath.jointype = jointype;
 	pathnode->jpath.inner_unique = extra->inner_unique;
-	pathnode->jpath.outerjoinpath = outer_path;
-	pathnode->jpath.innerjoinpath = inner_path;
+	link_path(&(pathnode->jpath.outerjoinpath), outer_path);
+	link_path(&(pathnode->jpath.innerjoinpath), inner_path);
 	pathnode->jpath.joinrestrictinfo = restrict_clauses;
 	pathnode->path_hashclauses = hashclauses;
 	/* final_cost_hashjoin will fill in pathnode->num_batches */
@@ -2827,6 +2840,9 @@ create_projection_path(PlannerInfo *root,
 		Assert(subpp->path.parent == rel);
 		subpath = subpp->subpath;
 		Assert(!IsA(subpath, ProjectionPath));
+
+		/* Free it if not used anywhere else. */
+		unlink_path((Path **) &subpp);
 	}
 
 	pathnode->path.pathtype = T_Result;
@@ -2842,7 +2858,7 @@ create_projection_path(PlannerInfo *root,
 	/* Projection does not change the sort order */
 	pathnode->path.pathkeys = subpath->pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	/*
 	 * We might not need a separate Result node.  If the input plan node type
@@ -2961,21 +2977,21 @@ apply_projection_to_path(PlannerInfo *root,
 		{
 			GatherPath *gpath = (GatherPath *) path;
 
-			gpath->subpath = (Path *)
-				create_projection_path(root,
-									   gpath->subpath->parent,
-									   gpath->subpath,
-									   target);
+			link_path(&gpath->subpath,
+					  (Path *) create_projection_path(root,
+													  gpath->subpath->parent,
+													  gpath->subpath,
+													  target));
 		}
 		else
 		{
 			GatherMergePath *gmpath = (GatherMergePath *) path;
 
-			gmpath->subpath = (Path *)
-				create_projection_path(root,
-									   gmpath->subpath->parent,
-									   gmpath->subpath,
-									   target);
+			link_path(&gmpath->subpath,
+					  (Path *) create_projection_path(root,
+													  gmpath->subpath->parent,
+													  gmpath->subpath,
+													  target));
 		}
 	}
 	else if (path->parallel_safe &&
@@ -3024,7 +3040,7 @@ create_set_projection_path(PlannerInfo *root,
 	/* Projection does not change the sort order XXX? */
 	pathnode->path.pathkeys = subpath->pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	/*
 	 * Estimate number of rows produced by SRFs for each row of input; if
@@ -3093,7 +3109,7 @@ create_incremental_sort_path(PlannerInfo *root,
 	pathnode->path.parallel_workers = subpath->parallel_workers;
 	pathnode->path.pathkeys = pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	cost_incremental_sort(&pathnode->path,
 						  root, pathkeys, presorted_keys,
@@ -3140,7 +3156,7 @@ create_sort_path(PlannerInfo *root,
 	pathnode->path.parallel_workers = subpath->parallel_workers;
 	pathnode->path.pathkeys = pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	cost_sort(&pathnode->path, root, pathkeys,
 			  subpath->total_cost,
@@ -3186,7 +3202,7 @@ create_group_path(PlannerInfo *root,
 	/* Group doesn't change sort ordering */
 	pathnode->path.pathkeys = subpath->pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	pathnode->groupClause = groupClause;
 	pathnode->qual = qual;
@@ -3244,7 +3260,7 @@ create_upper_unique_path(PlannerInfo *root,
 	/* Unique doesn't change the input ordering */
 	pathnode->path.pathkeys = subpath->pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->numkeys = numCols;
 
 	/*
@@ -3317,7 +3333,7 @@ create_agg_path(PlannerInfo *root,
 	else
 		pathnode->path.pathkeys = NIL;	/* output is unordered */
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	pathnode->aggstrategy = aggstrategy;
 	pathnode->aggsplit = aggsplit;
@@ -3380,7 +3396,7 @@ create_groupingsets_path(PlannerInfo *root,
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
 	pathnode->path.parallel_workers = subpath->parallel_workers;
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 
 	/*
 	 * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
@@ -3630,7 +3646,7 @@ create_windowagg_path(PlannerInfo *root,
 	/* WindowAgg preserves the input sort order */
 	pathnode->path.pathkeys = subpath->pathkeys;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->winclause = winclause;
 	pathnode->qual = qual;
 	pathnode->topwindow = topwindow;
@@ -3699,7 +3715,7 @@ create_setop_path(PlannerInfo *root,
 	pathnode->path.pathkeys =
 		(strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->cmd = cmd;
 	pathnode->strategy = strategy;
 	pathnode->distinctList = distinctList;
@@ -3758,8 +3774,8 @@ create_recursiveunion_path(PlannerInfo *root,
 	/* RecursiveUnion result is always unsorted */
 	pathnode->path.pathkeys = NIL;
 
-	pathnode->leftpath = leftpath;
-	pathnode->rightpath = rightpath;
+	link_path(&pathnode->leftpath, leftpath);
+	link_path(&pathnode->rightpath, rightpath);
 	pathnode->distinctList = distinctList;
 	pathnode->wtParam = wtParam;
 	pathnode->numGroups = numGroups;
@@ -3801,7 +3817,7 @@ create_lockrows_path(PlannerInfo *root, RelOptInfo *rel,
 	 */
 	pathnode->path.pathkeys = NIL;
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->rowMarks = rowMarks;
 	pathnode->epqParam = epqParam;
 
@@ -3904,7 +3920,7 @@ create_modifytable_path(PlannerInfo *root, RelOptInfo *rel,
 		pathnode->path.pathtarget->width = 0;
 	}
 
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->operation = operation;
 	pathnode->canSetTag = canSetTag;
 	pathnode->nominalRelation = nominalRelation;
@@ -3962,7 +3978,7 @@ create_limit_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->path.startup_cost = subpath->startup_cost;
 	pathnode->path.total_cost = subpath->total_cost;
 	pathnode->path.pathkeys = subpath->pathkeys;
-	pathnode->subpath = subpath;
+	link_path(&pathnode->subpath, subpath);
 	pathnode->limitOffset = limitOffset;
 	pathnode->limitCount = limitCount;
 	pathnode->limitOption = limitOption;
@@ -4243,6 +4259,7 @@ do { \
 	(path) = reparameterize_path_by_child(root, (path), child_rel); \
 	if ((path) == NULL) \
 		return NULL; \
+	link_path(&(path), (path)); \
 } while(0)
 
 #define REPARAMETERIZE_CHILD_PATH_LIST(pathlist) \
@@ -4534,11 +4551,14 @@ reparameterize_pathlist_by_child(PlannerInfo *root,
 
 		if (path == NULL)
 		{
+			/* TODO: unlink paths in the list */
 			list_free(result);
 			return NIL;
 		}
 
+		/* TODO: need a routine to link a path into a linked list */
 		result = lappend(result, path);
+		path->ref_count++;
 	}
 
 	return result;
-- 
2.25.1

From 97ba1883e3df294294ea652f684b3503de63f247 Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.ba...@enterprisedb.com>
Date: Mon, 17 Jul 2023 15:10:45 +0530
Subject: [PATCH 3/3] Local variables pointing to path node used repeatedly

In match_unsorted_outer() we create a materialize path for inner
relation and pass it to try_nestloop_path repeatedly. Link and unlink
the path to and from the local variable to keep track of it.
---
 src/backend/optimizer/path/joinpath.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 6aca66f196..36dffb0af6 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -1801,12 +1801,14 @@ match_unsorted_outer(PlannerInfo *root,
 		/*
 		 * Consider materializing the cheapest inner path, unless
 		 * enable_material is off or the path in question materializes its
-		 * output anyway.
+		 * output anyway. Link the path to a local reference so that it won't
+		 * be freed by add_path if the surrounding nest path is freed by
+		 * add_path. It will get freed if not used later.
 		 */
 		if (enable_material && inner_cheapest_total != NULL &&
 			!ExecMaterializesOutput(inner_cheapest_total->pathtype))
-			matpath = (Path *)
-				create_material_path(innerrel, inner_cheapest_total);
+			link_path(&matpath,
+					  (Path *) create_material_path(innerrel, inner_cheapest_total));
 	}
 
 	foreach(lc1, outerrel->pathlist)
@@ -1922,6 +1924,10 @@ match_unsorted_outer(PlannerInfo *root,
 								 false);
 	}
 
+	/* Materialized inner path won't be used anymore. Unlink it */
+	if (matpath)
+		unlink_path(&matpath);
+
 	/*
 	 * Consider partial nestloop and mergejoin plan if outerrel has any
 	 * partial path and the joinrel is parallel-safe.  However, we can't
-- 
2.25.1

Reply via email to