On 02.04.2025 00:06, David Rowley wrote:
I tried to move things along to address Tom's concern about not
wanting to show this in EXPLAIN's standard output. I suggested in [1]
that we could use EXPLAIN ANALYZE, but nobody commented on that.
EXPLAIN ANALYZE is much more verbose than EXPLAIN already, and if we
put these in EXPLAIN ANALYZE then the viewer can more easily compare
planned vs actual. I had mentioned that Hash Join does something like
this for buckets.

David

[1]https://postgr.es/m/CAApHDvqPkWmz1Lq23c9CD+mAW=hgprd289tc30f3h1f6ng5...@mail.gmail.com


Apologies for not addressing your earlier suggestion properly. After reconsidering, I agree that showing ndistinct and est_entries in EXPLAIN ANALYZE when there are actual cache misses is the best approach. This is typically when users notice performance regressions and start investigating the cause, so surfacing planner expectations at that point can be the most useful. I attached v6 patch with changes.

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.
From 36ed866e3d28a709fd1133a89ecdc7a2654a657f Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Wed, 2 Apr 2025 10:49:27 +0300
Subject: [PATCH v6] Display estimated distinct keys and cache capacity in
 EXPLAIN ANALYZE for Memoize when cache misses occur

---
 src/backend/commands/explain.c          | 13 +++++++++++++
 src/backend/optimizer/path/costsize.c   |  3 +++
 src/backend/optimizer/plan/createplan.c | 10 +++++++---
 src/backend/optimizer/util/pathnode.c   |  3 +++
 src/include/nodes/pathnodes.h           |  2 ++
 src/include/nodes/plannodes.h           |  6 ++++++
 6 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index ef8aa489af8..f21de58e0a0 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3635,6 +3635,19 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
 
 	if (mstate->stats.cache_misses > 0)
 	{
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Estimated Capacity: %u  Estimated Distinct Lookup Keys: %0.0f\n",
+							((Memoize *) plan)->est_entries,
+							((Memoize *) plan)->est_unique_keys);
+		}
+		else
+		{
+			ExplainPropertyUInteger("Estimated Capacity", "", ((Memoize *) plan)->est_entries, es);
+			ExplainPropertyFloat("Estimated Distinct Lookup Keys", "", ((Memoize *) plan)->est_unique_keys, 0, es);
+		}
+
 		/*
 		 * mem_peak is only set when we freed memory, so we must use mem_used
 		 * when mem_peak is 0.
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index f6f77b8fe19..e3abf9ccc26 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2604,6 +2604,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
 							 PG_UINT32_MAX);
 
+	/* Remember ndistinct for a potential EXPLAIN later */
+	mpath->est_unique_keys = ndistinct;
+
 	/*
 	 * When the number of distinct parameter values is above the amount we can
 	 * store in the cache, then we'll have to evict some entries from the
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 75e2b0b9036..38882501484 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -284,7 +284,8 @@ static Material *make_material(Plan *lefttree);
 static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
 							 Oid *collations, List *param_exprs,
 							 bool singlerow, bool binary_mode,
-							 uint32 est_entries, Bitmapset *keyparamids);
+							 uint32 est_entries, double est_unique_keys,
+							 Bitmapset *keyparamids);
 static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
 								 int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
 								 int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1703,7 +1704,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
 
 	plan = make_memoize(subplan, operators, collations, param_exprs,
 						best_path->singlerow, best_path->binary_mode,
-						best_path->est_entries, keyparamids);
+						best_path->est_entries, best_path->est_unique_keys,
+						keyparamids);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -6636,7 +6638,8 @@ materialize_finished_plan(Plan *subplan)
 static Memoize *
 make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 			 List *param_exprs, bool singlerow, bool binary_mode,
-			 uint32 est_entries, Bitmapset *keyparamids)
+			uint32 est_entries, double est_unique_keys,
+			Bitmapset *keyparamids)
 {
 	Memoize    *node = makeNode(Memoize);
 	Plan	   *plan = &node->plan;
@@ -6654,6 +6657,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 	node->binary_mode = binary_mode;
 	node->est_entries = est_entries;
 	node->keyparamids = keyparamids;
+	node->est_unique_keys = est_unique_keys;
 
 	return node;
 }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..1fbcda99067 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1701,6 +1701,9 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	Assert(enable_memoize);
 	pathnode->path.disabled_nodes = subpath->disabled_nodes;
 
+	/* Estimated number of distinct memoization keys, computed using estimate_num_groups() */
+	pathnode->est_unique_keys = 0;
+
 	/*
 	 * Add a small additional charge for caching the first entry.  All the
 	 * harder calculations for rescans are performed in cost_memoize_rescan().
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c24a1fc8514..0946ccea994 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2040,6 +2040,8 @@ typedef struct MemoizePath
 	uint32		est_entries;	/* The maximum number of entries that the
 								 * planner expects will fit in the cache, or 0
 								 * if unknown */
+	double		est_unique_keys;		/* Estimated number of distinct memoization keys,
+								 * used for cache size evaluation. Kept for EXPLAIN */
 } MemoizePath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 658d76225e4..3d9d3a1159d 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1063,6 +1063,12 @@ typedef struct Memoize
 
 	/* paramids from param_exprs */
 	Bitmapset  *keyparamids;
+
+	/*
+	 * Estimated number of distinct memoization keys,
+	 * used for cache size evaluation. Kept for EXPLAIN
+	 */
+	double		est_unique_keys;
 } Memoize;
 
 /* ----------------
-- 
2.34.1

Reply via email to