After considering the opinions expressed in this discussion, I tend to
agree more with David. If we add info about estimated unique keys and
estimated capacity, then any additional information - such as
evict_ratio and hit_ratio - can also be calculated, as EXPLAIN ANALYZE
provides all the necessary details to compute these values.
For now, I’m attaching a rebased v4 patch, which includes the fix for
ExplainPropertyText.
--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.
From b60e508583b1cb7f30814cef6f39c4b570693350 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Fri, 21 Mar 2025 11:52:29 +0300
Subject: [PATCH v4] Show ndistinct and est_entries in EXPLAIN for Memoize
---
src/backend/commands/explain.c | 16 ++++++++++++++++
src/backend/optimizer/path/costsize.c | 3 +++
src/backend/optimizer/plan/createplan.c | 10 +++++++---
src/backend/optimizer/util/pathnode.c | 3 +++
src/include/nodes/pathnodes.h | 2 ++
src/include/nodes/plannodes.h | 6 ++++++
6 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 391b34a2af2..cb7e295ca8a 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3628,6 +3628,22 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
ExplainPropertyText("Cache Key", keystr.data, es);
ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
+ if (es->costs)
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Estimated Capacity: %u Estimated Distinct Lookup Keys: %0.0f\n",
+ ((Memoize *) plan)->est_entries,
+ ((Memoize *) plan)->est_unique_keys);
+ }
+ else
+ {
+ ExplainPropertyUInteger("Estimated Capacity", "", ((Memoize *) plan)->est_entries, es);
+ ExplainPropertyFloat("Estimated Distinct Lookup Keys", "", ((Memoize *) plan)->est_unique_keys, 0, es);
+ }
+ }
+
pfree(keystr.data);
if (!es->analyze)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index f6f77b8fe19..e3abf9ccc26 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2604,6 +2604,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
PG_UINT32_MAX);
+ /* Remember ndistinct for a potential EXPLAIN later */
+ mpath->est_unique_keys = ndistinct;
+
/*
* When the number of distinct parameter values is above the amount we can
* store in the cache, then we'll have to evict some entries from the
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 75e2b0b9036..596b82c5dc9 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -284,7 +284,8 @@ static Material *make_material(Plan *lefttree);
static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
Oid *collations, List *param_exprs,
bool singlerow, bool binary_mode,
- uint32 est_entries, Bitmapset *keyparamids);
+ uint32 est_entries, Bitmapset *keyparamids,
+ double est_unique_keys);
static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1703,7 +1704,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
plan = make_memoize(subplan, operators, collations, param_exprs,
best_path->singlerow, best_path->binary_mode,
- best_path->est_entries, keyparamids);
+ best_path->est_entries, keyparamids,
+ best_path->est_unique_keys);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6636,7 +6638,8 @@ materialize_finished_plan(Plan *subplan)
static Memoize *
make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
List *param_exprs, bool singlerow, bool binary_mode,
- uint32 est_entries, Bitmapset *keyparamids)
+ uint32 est_entries, Bitmapset *keyparamids,
+ double est_unique_keys)
{
Memoize *node = makeNode(Memoize);
Plan *plan = &node->plan;
@@ -6654,6 +6657,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
node->binary_mode = binary_mode;
node->est_entries = est_entries;
node->keyparamids = keyparamids;
+ node->est_unique_keys = est_unique_keys;
return node;
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..1fbcda99067 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1701,6 +1701,9 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
Assert(enable_memoize);
pathnode->path.disabled_nodes = subpath->disabled_nodes;
+ /* Estimated number of distinct memoization keys, computed using estimate_num_groups() */
+ pathnode->est_unique_keys = 0;
+
/*
* Add a small additional charge for caching the first entry. All the
* harder calculations for rescans are performed in cost_memoize_rescan().
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c24a1fc8514..0946ccea994 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2040,6 +2040,8 @@ typedef struct MemoizePath
uint32 est_entries; /* The maximum number of entries that the
* planner expects will fit in the cache, or 0
* if unknown */
+ double est_unique_keys; /* Estimated number of distinct memoization keys,
+ * used for cache size evaluation. Kept for EXPLAIN */
} MemoizePath;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index f78bffd90cf..b38d4d91fb4 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1060,6 +1060,12 @@ typedef struct Memoize
/* paramids from param_exprs */
Bitmapset *keyparamids;
+
+ /*
+ * Estimated number of distinct memoization keys,
+ * used for cache size evaluation. Kept for EXPLAIN
+ */
+ double est_unique_keys;
} Memoize;
/* ----------------
--
2.34.1