Then we need to decide clearly what exactly to display in EXPLAIN for
the Memoize node: absolute values (estimated distinct keys and estimated
cache capacity) or ratios (hit_ratio and evict_ratio). Ratios have the
advantage of quickly reflecting the overall effectiveness of Memoize.
However, absolute values have a significant advantage as they explicitly
reveal the reason of Memoize's poor performance, making problem
diagnosis simpler.
With absolute values, users can directly understand the underlying
reason for poor performance. For example: insufficient memory (capacity
< distinct keys), inaccurate planner statistics (distinct keys
significantly different from actual values), poorly ordered keys
(capacity ~ distinct keys, but frequent evictions as seen in the
Evictions parameter), or Memoize simply not being beneficial (capacity ~
distinct keys ~ calls). Ratios, by contrast, only reflect the final
outcome without clearly indicating the cause or the specific steps
needed to resolve the issue.
Thus, absolute values do more than just inform users that a problem
exists; they provide actionable details that enable users to directly
address the problem (increase work_mem, refresh statistics, create
extended statistics, or disable Memoize entirely). Additionally, no
other plan nodes in PostgreSQL currently use a similar ratio-based
approach - everywhere else absolute values are consistently shown (e.g.,
number of rows, buckets, batches, memory used, etc.). Using absolute
values in Memoize maintains consistency with existing practice.
I've updated the patch to v5, since the new parameter est_unique_keys in
make_memoize() is now placed near est_entries, which is more logical and
readable than putting it at the end.
Any thoughts?
--
Best Regards,
Ilia Evdokimov,
Tantor Labs LLC.
From 79af33e499730a4bd0553832b23bf68c90817c6e Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdoki...@tantorlabs.com>
Date: Fri, 28 Mar 2025 15:17:09 +0300
Subject: [PATCH v5] Show ndistinct and est_entries in EXPLAIN for Memoize
---
src/backend/commands/explain.c | 16 ++++++++++++++++
src/backend/optimizer/path/costsize.c | 3 +++
src/backend/optimizer/plan/createplan.c | 10 +++++++---
src/backend/optimizer/util/pathnode.c | 3 +++
src/include/nodes/pathnodes.h | 2 ++
src/include/nodes/plannodes.h | 6 ++++++
6 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 391b34a2af2..2bacf7f85cb 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3628,6 +3628,22 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
ExplainPropertyText("Cache Key", keystr.data, es);
ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
+ if (es->costs)
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Estimated Capacity: %u Estimated Distinct Lookup Keys: %0.0f\n",
+ ((Memoize *) plan)->est_entries,
+ ((Memoize *) plan)->est_unique_keys);
+ }
+ else
+ {
+ ExplainPropertyUInteger("Estimated Capacity", "", ((Memoize *) plan)->est_entries, es);
+ ExplainPropertyFloat("Estimated Distinct Lookup Keys", "", ((Memoize *) plan)->est_unique_keys, 0, es);
+ }
+ }
+
pfree(keystr.data);
if (!es->analyze)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index f6f77b8fe19..e3abf9ccc26 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2604,6 +2604,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
PG_UINT32_MAX);
+ /* Remember ndistinct for a potential EXPLAIN later */
+ mpath->est_unique_keys = ndistinct;
+
/*
* When the number of distinct parameter values is above the amount we can
* store in the cache, then we'll have to evict some entries from the
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 75e2b0b9036..38882501484 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -284,7 +284,8 @@ static Material *make_material(Plan *lefttree);
static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
Oid *collations, List *param_exprs,
bool singlerow, bool binary_mode,
- uint32 est_entries, Bitmapset *keyparamids);
+ uint32 est_entries, double est_unique_keys,
+ Bitmapset *keyparamids);
static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1703,7 +1704,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
plan = make_memoize(subplan, operators, collations, param_exprs,
best_path->singlerow, best_path->binary_mode,
- best_path->est_entries, keyparamids);
+ best_path->est_entries, best_path->est_unique_keys,
+ keyparamids);
copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6636,7 +6638,8 @@ materialize_finished_plan(Plan *subplan)
static Memoize *
make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
List *param_exprs, bool singlerow, bool binary_mode,
- uint32 est_entries, Bitmapset *keyparamids)
+ uint32 est_entries, double est_unique_keys,
+ Bitmapset *keyparamids)
{
Memoize *node = makeNode(Memoize);
Plan *plan = &node->plan;
@@ -6654,6 +6657,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
node->binary_mode = binary_mode;
node->est_entries = est_entries;
node->keyparamids = keyparamids;
+ node->est_unique_keys = est_unique_keys;
return node;
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..1fbcda99067 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1701,6 +1701,9 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
Assert(enable_memoize);
pathnode->path.disabled_nodes = subpath->disabled_nodes;
+ /* Estimated number of distinct memoization keys, computed using estimate_num_groups() */
+ pathnode->est_unique_keys = 0;
+
/*
* Add a small additional charge for caching the first entry. All the
* harder calculations for rescans are performed in cost_memoize_rescan().
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c24a1fc8514..0946ccea994 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2040,6 +2040,8 @@ typedef struct MemoizePath
uint32 est_entries; /* The maximum number of entries that the
* planner expects will fit in the cache, or 0
* if unknown */
+ double est_unique_keys; /* Estimated number of distinct memoization keys,
+ * used for cache size evaluation. Kept for EXPLAIN */
} MemoizePath;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 658d76225e4..3d9d3a1159d 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1063,6 +1063,12 @@ typedef struct Memoize
/* paramids from param_exprs */
Bitmapset *keyparamids;
+
+ /*
+ * Estimated number of distinct memoization keys,
+ * used for cache size evaluation. Kept for EXPLAIN
+ */
+ double est_unique_keys;
} Memoize;
/* ----------------
--
2.34.1