On 27.04.2025 22:58, Ilia Evdokimov wrote:
Therefore, I think it is better to report percentages directly. Since non-text EXPLAIN formats do not display units, I propose to rename the field to "hit_percent" in all formats, including the text format. This way, the meaning of the value remains clear without needing additional context. What do you think about this approach?
I attached updated v9 patch with the suggested changes. The updated line in the EXPLAIN looks like this:
Text format: Estimates: capacity=1 distinct keys=1 lookups=2 hit percent=50.00% Non-text format: Estimated Capacity: 1 Estimated Distinct Lookup Keys: 1 Estimated Lookups: 2 Estimated Hit Percent: 50.00 Any suggestions? -- Best regards, Ilia Evdokimov, Tantor Labs LLC.
From eff0cb420c922343af9e09e58cec7b5be19c35e4 Mon Sep 17 00:00:00 2001 From: Evdokimov Ilia <ilya.evdoki...@tantorlabs.com> Date: Thu, 1 May 2025 15:20:07 +0300 Subject: [PATCH v9] Expose cache hit statistics in Memoize node EXPLAIN output. This patch adds additional information to the Memoize node's EXPLAIN output: estimated cache capacity, number of distinct keys, total lookups, and cache hit percentage. Reviewed-by: David Rowley <dgrowle...@gmail.com> Reviewed-by: Robert Haas <robertmh...@gmail.com> Reviewed-by: Andrei Lepikhov <lepi...@gmail.com> Reviewed-by: Tom Lane <t...@sss.pgh.pa.us> --- src/backend/commands/explain.c | 20 ++++++++++++++++++++ src/backend/optimizer/path/costsize.c | 6 ++++++ src/backend/optimizer/plan/createplan.c | 15 ++++++++++++--- src/backend/optimizer/util/pathnode.c | 12 ++++++++++++ src/include/nodes/pathnodes.h | 3 +++ src/include/nodes/plannodes.h | 12 ++++++++++++ 6 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 786ee865f14..3a5b436b561 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3628,6 +3628,26 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) ExplainPropertyText("Cache Key", keystr.data, es); ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es); + if (es->costs) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Estimates: capacity=%u distinct keys=%.0f lookups=%.0f hit percent=%.2f%%\n", + ((Memoize *) plan)->est_entries, + ((Memoize *) plan)->est_unique_keys, + ((Memoize *) plan)->est_calls, + ((Memoize *) plan)->est_hit_ratio * 100.0); + } + else + { + ExplainPropertyUInteger("Estimated Capacity", NULL, ((Memoize *) plan)->est_entries, es); + ExplainPropertyFloat("Estimated Distinct Lookup Keys", NULL, ((Memoize *) plan)->est_unique_keys, 0, es); + ExplainPropertyFloat("Estimated Lookups", NULL, ((Memoize *) plan)->est_calls, 0, es); + ExplainPropertyFloat("Estimated Hit Percent", NULL, ((Memoize *) plan)->est_hit_ratio * 100.0, 2, es); + } + } + pfree(keystr.data); if (!es->analyze) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 60b0fcfb6be..226a7861543 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2604,6 +2604,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, mpath->est_entries = Min(Min(ndistinct, est_cache_entries), PG_UINT32_MAX); + /* Remember ndistinct for a potential EXPLAIN later */ + mpath->est_unique_keys = ndistinct; + /* * When the number of distinct parameter values is above the amount we can * store in the cache, then we'll have to evict some entries from the @@ -2621,6 +2624,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, hit_ratio = ((calls - ndistinct) / calls) * (est_cache_entries / Max(ndistinct, est_cache_entries)); + /* Remember cache hit ratio for a potential EXPLAIN later */ + mpath->est_hit_ratio = hit_ratio; + Assert(hit_ratio >= 0 && hit_ratio <= 1.0); /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index a8f22a8c154..b3d3fac187f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -284,7 +284,9 @@ static Material *make_material(Plan *lefttree); static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, List *param_exprs, bool singlerow, bool binary_mode, - uint32 est_entries, Bitmapset *keyparamids); + uint32 est_entries, Bitmapset *keyparamids, + double est_unique_keys, double est_hit_ratio, + double est_calls); static WindowAgg *make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, @@ -1703,7 +1705,9 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags) plan = make_memoize(subplan, operators, collations, param_exprs, best_path->singlerow, best_path->binary_mode, - best_path->est_entries, keyparamids); + best_path->est_entries, keyparamids, + best_path->est_unique_keys, best_path->est_hit_ratio, + best_path->calls); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6636,7 +6640,9 @@ materialize_finished_plan(Plan *subplan) static Memoize * make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, List *param_exprs, bool singlerow, bool binary_mode, - uint32 est_entries, Bitmapset *keyparamids) + uint32 est_entries, Bitmapset *keyparamids, + double est_unique_keys, double est_hit_ratio, + double est_calls) { Memoize *node = makeNode(Memoize); Plan *plan = &node->plan; @@ -6654,6 +6660,9 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, node->binary_mode = binary_mode; node->est_entries = est_entries; node->keyparamids = keyparamids; + node->est_unique_keys = est_unique_keys; + node->est_hit_ratio = est_hit_ratio; + node->est_calls = est_calls; return node; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 93e73cb44db..7e35421f410 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1701,6 +1701,18 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, Assert(enable_memoize); pathnode->path.disabled_nodes = subpath->disabled_nodes; + /* + * Estimated number of distinct memoization keys, + * computed using estimate_num_groups() + */ + pathnode->est_unique_keys = 0; + + /* + * The estimated cache hit ratio will calculated later + * by cost_memoize_rescan(). + */ + pathnode->est_hit_ratio = 0; + /* * Add a small additional charge for caching the first entry. All the * harder calculations for rescans are performed in cost_memoize_rescan(). diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 011e5a811c3..938e4c43a81 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2138,6 +2138,9 @@ typedef struct MemoizePath uint32 est_entries; /* The maximum number of entries that the * planner expects will fit in the cache, or 0 * if unknown */ + double est_unique_keys; /* Estimated number of distinct memoization keys, + * used for cache size evaluation. Kept for EXPLAIN */ + double est_hit_ratio; /* Estimated cache hit ratio. Kept for EXPLAIN */ } MemoizePath; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 658d76225e4..26a4c78afe8 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1063,6 +1063,18 @@ typedef struct Memoize /* paramids from param_exprs */ Bitmapset *keyparamids; + + /* + * Estimated number of distinct memoization keys, + * used for cache size evaluation. Kept for EXPLAIN + */ + double est_unique_keys; + + /* Estimated cache hit ratio. Kept for EXPLAIN */ + double est_hit_ratio; + + /* Estimated number of rescans. Kept for EXPLAIN */ + double est_calls; } Memoize; /* ---------------- -- 2.34.1