On 06.07.2023 11:27, Lukas Fittl wrote:
On Thu, Jul 6, 2023 at 12:56 AM Daniel Gustafsson <dan...@yesql.se> wrote:

    Lukas: do you have an updated patch for this commitfest to address
    David's
    comments?


I have a draft - I should be able to post an updated patch in the next days. Thanks for checking!

Thanks,
Lukas

--
Lukas Fittl


Hi hackers,

While debugging a query execution plan involving Memoize, it'd be nice to determine how many unique keys would fit into the cache. The est_entries value provides some insight, but without knowing ndistinct, it is unclear whether the cache is large enough to hold all unique keys or if some will be evicted.

Given its potential usefulness, I would like to work for this. I attached v2 patch with changes.

Example from memoize.sql

EXPLAIN SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
INNER JOIN tenk1 t2 ON t1.unique1 = t2.thousand
WHERE t2.unique1 < 1200;
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Aggregate  (cost=815.12..815.13 rows=1 width=40)
   ->  Nested Loop  (cost=0.30..809.12 rows=1200 width=4)
         ->  Seq Scan on tenk1 t2  (cost=0.00..470.00 rows=1200 width=4)
               Filter: (unique1 < 1200)
         ->  Memoize  (cost=0.30..0.41 rows=1 width=4)
               Cache Key: t2.thousand
               Cache Mode: logical
               Cache Estimated Entries: 655
               Cache Estimated NDistinct: 721
               ->  Index Only Scan using tenk1_unique1 on tenk1 t1  (cost=0.29..0.40 rows=1 width=4)
                     Index Cond: (unique1 = t2.thousand)
(11 rows)

Additionally, since this information would only be shown in EXPLAIN when costs are enabled, it should not cause any performance regression in normal execution. However, reviewers should be especially careful when verifying test outputs, as this change could affect plan details in regression tests.

Any thoughts?

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.
From 2e7d9bd9bfe58825fbdbb0100b9bd19d5bb7e53b Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Thu, 20 Mar 2025 11:45:02 +0300
Subject: [PATCH v2] Show ndistinct and est_entries in EXPLAIN for Memoize

---
 src/backend/commands/explain.c          | 12 ++++++++++++
 src/backend/optimizer/path/costsize.c   |  3 +++
 src/backend/optimizer/plan/createplan.c | 10 +++++++---
 src/backend/optimizer/util/pathnode.c   |  3 +++
 src/include/nodes/pathnodes.h           |  2 ++
 src/include/nodes/plannodes.h           |  6 ++++++
 6 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 33a16d2d8e2..95110e63c7d 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3629,6 +3629,11 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
 	{
 		ExplainPropertyText("Cache Key", keystr.data, es);
 		ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
+		if (es->costs)
+		{
+			ExplainPropertyFloat("Cache Estimated Entries", "", ((Memoize *) plan)->est_entries, 0, es);
+			ExplainPropertyFloat("Cache Estimated NDistinct", "", ((Memoize *) plan)->ndistinct, 0, es);
+		}
 	}
 	else
 	{
@@ -3636,6 +3641,13 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
 		appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
 		ExplainIndentText(es);
 		appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical");
+		if (es->costs)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Cache Estimated Entries: %d\n", ((Memoize *) plan)->est_entries);
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Cache Estimated NDistinct: %0.0f\n", ((Memoize *) plan)->ndistinct);
+		}
 	}
 
 	pfree(keystr.data);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 256568d05a2..9c0738da4fe 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2604,6 +2604,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
 							 PG_UINT32_MAX);
 
+	/* Remember ndistinct for a potential EXPLAIN later */
+	mpath->ndistinct = ndistinct;
+
 	/*
 	 * When the number of distinct parameter values is above the amount we can
 	 * store in the cache, then we'll have to evict some entries from the
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 75e2b0b9036..79795a2b5f2 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -284,7 +284,8 @@ static Material *make_material(Plan *lefttree);
 static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
 							 Oid *collations, List *param_exprs,
 							 bool singlerow, bool binary_mode,
-							 uint32 est_entries, Bitmapset *keyparamids);
+							uint32 est_entries, Bitmapset *keyparamids,
+							double ndistinct);
 static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
 								 int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
 								 int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1703,7 +1704,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
 
 	plan = make_memoize(subplan, operators, collations, param_exprs,
 						best_path->singlerow, best_path->binary_mode,
-						best_path->est_entries, keyparamids);
+						best_path->est_entries, keyparamids,
+						best_path->ndistinct);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -6636,7 +6638,8 @@ materialize_finished_plan(Plan *subplan)
 static Memoize *
 make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 			 List *param_exprs, bool singlerow, bool binary_mode,
-			 uint32 est_entries, Bitmapset *keyparamids)
+			uint32 est_entries, Bitmapset *keyparamids,
+			double ndistinct)
 {
 	Memoize    *node = makeNode(Memoize);
 	Plan	   *plan = &node->plan;
@@ -6654,6 +6657,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 	node->binary_mode = binary_mode;
 	node->est_entries = est_entries;
 	node->keyparamids = keyparamids;
+	node->ndistinct = ndistinct;
 
 	return node;
 }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..1676e20879a 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1701,6 +1701,9 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	Assert(enable_memoize);
 	pathnode->path.disabled_nodes = subpath->disabled_nodes;
 
+	/* Estimated number of distinct memoization keys, computed using estimate_num_groups() */
+	pathnode->ndistinct = 0;
+
 	/*
 	 * Add a small additional charge for caching the first entry.  All the
 	 * harder calculations for rescans are performed in cost_memoize_rescan().
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c24a1fc8514..e99dbee2b3f 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2040,6 +2040,8 @@ typedef struct MemoizePath
 	uint32		est_entries;	/* The maximum number of entries that the
 								 * planner expects will fit in the cache, or 0
 								 * if unknown */
+	double		ndistinct;		/* Estimated number of distinct memoization keys,
+								 * used for cache size evaluation. Kept for EXPLAIN */
 } MemoizePath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index f78bffd90cf..16b307a2141 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1060,6 +1060,12 @@ typedef struct Memoize
 
 	/* paramids from param_exprs */
 	Bitmapset  *keyparamids;
+
+	/*
+	 * Estimated number of distinct memoization keys,
+	 * used for cache size evaluation. Kept for EXPLAIN
+	 */
+	double		ndistinct;
 } Memoize;
 
 /* ----------------
-- 
2.34.1

Reply via email to