Re: Add estimated hit ratio to Memoize in EXPLAIN to explain cost adjustment

Ilia Evdokimov Mon, 14 Apr 2025 16:15:29 -0700


On 14.04.2025 23:53, David Rowley wrote:

If we can't get consensus for everything people want to add at once
then maybe the patch could be broken into two, with 0001 being pretty
much the v4 patch and then have 0002 add the Estimated Hit Ratio.
Having the physical patches and being able to try it out or view the
regression test changes might lower the bar on people chipping in with
their views.

Agreed - I’ll split the patch into two: one adds Estimated Capacityand Estimated Distinct Keys, and the second adds Estimated Hit Ratio.I’ll also add regression test differences to make it easier to evaluatethe usefulness of each part.


--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.

From 99a23dfae4415a3eb88c3847deefae3c2103077d Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdoki...@tantorlabs.com>
Date: Tue, 15 Apr 2025 00:47:26 +0300
Subject: [PATCH v7 1/2] Show ndistinct and est_entries in EXPLAIN for Memoize

---
 src/backend/commands/explain.c               | 13 ++++
 src/backend/optimizer/path/costsize.c        |  3 +
 src/backend/optimizer/plan/createplan.c      | 10 ++-
 src/backend/optimizer/util/pathnode.c        |  3 +
 src/include/nodes/pathnodes.h                |  2 +
 src/include/nodes/plannodes.h                |  6 ++
 src/test/regress/expected/create_index.out   |  3 +-
 src/test/regress/expected/join.out           | 64 +++++++++++---------
 src/test/regress/expected/memoize.out        | 54 +++++++++++------
 src/test/regress/expected/partition_join.out | 18 ++++--
 src/test/regress/expected/subselect.out      | 10 +--
 11 files changed, 126 insertions(+), 60 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 786ee865f14..0a4ce5ee7d7 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3628,6 +3628,19 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
 	ExplainPropertyText("Cache Key", keystr.data, es);
 	ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
 
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str, "Estimated Capacity: %u  Estimated Distinct Lookup Keys: %0.0f\n",
+						((Memoize *) plan)->est_entries,
+						((Memoize *) plan)->est_unique_keys);
+	}
+	else
+	{
+		ExplainPropertyUInteger("Estimated Capacity", "", ((Memoize *) plan)->est_entries, es);
+		ExplainPropertyFloat("Estimated Distinct Lookup Keys", "", ((Memoize *) plan)->est_unique_keys, 0, es);
+	}
+
 	pfree(keystr.data);
 
 	if (!es->analyze)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 60b0fcfb6be..f72319d903c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2604,6 +2604,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
 							 PG_UINT32_MAX);
 
+	/* Remember ndistinct for a potential EXPLAIN later */
+	mpath->est_unique_keys = ndistinct;
+
 	/*
 	 * When the number of distinct parameter values is above the amount we can
 	 * store in the cache, then we'll have to evict some entries from the
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index a8f22a8c154..a1456c9014d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -284,7 +284,8 @@ static Material *make_material(Plan *lefttree);
 static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
 							 Oid *collations, List *param_exprs,
 							 bool singlerow, bool binary_mode,
-							 uint32 est_entries, Bitmapset *keyparamids);
+							uint32 est_entries, Bitmapset *keyparamids,
+							double est_unique_keys);
 static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
 								 int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
 								 int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1703,7 +1704,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
 
 	plan = make_memoize(subplan, operators, collations, param_exprs,
 						best_path->singlerow, best_path->binary_mode,
-						best_path->est_entries, keyparamids);
+						best_path->est_entries, keyparamids,
+						best_path->est_unique_keys);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -6636,7 +6638,8 @@ materialize_finished_plan(Plan *subplan)
 static Memoize *
 make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 			 List *param_exprs, bool singlerow, bool binary_mode,
-			 uint32 est_entries, Bitmapset *keyparamids)
+			uint32 est_entries, Bitmapset *keyparamids,
+			double est_unique_keys)
 {
 	Memoize    *node = makeNode(Memoize);
 	Plan	   *plan = &node->plan;
@@ -6654,6 +6657,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 	node->binary_mode = binary_mode;
 	node->est_entries = est_entries;
 	node->keyparamids = keyparamids;
+	node->est_unique_keys = est_unique_keys;
 
 	return node;
 }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..1fbcda99067 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1701,6 +1701,9 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	Assert(enable_memoize);
 	pathnode->path.disabled_nodes = subpath->disabled_nodes;
 
+	/* Estimated number of distinct memoization keys, computed using estimate_num_groups() */
+	pathnode->est_unique_keys = 0;
+
 	/*
 	 * Add a small additional charge for caching the first entry.  All the
 	 * harder calculations for rescans are performed in cost_memoize_rescan().
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index bb678bdcdcd..07d97dc0b5b 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2138,6 +2138,8 @@ typedef struct MemoizePath
 	uint32		est_entries;	/* The maximum number of entries that the
 								 * planner expects will fit in the cache, or 0
 								 * if unknown */
+	double		est_unique_keys;	/* Estimated number of distinct memoization keys,
+								 * used for cache size evaluation. Kept for EXPLAIN */
 } MemoizePath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 658d76225e4..3d9d3a1159d 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1063,6 +1063,12 @@ typedef struct Memoize
 
 	/* paramids from param_exprs */
 	Bitmapset  *keyparamids;
+
+	/*
+	 * Estimated number of distinct memoization keys,
+	 * used for cache size evaluation. Kept for EXPLAIN
+	 */
+	double		est_unique_keys;
 } Memoize;
 
 /* ----------------
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 9ade7b835e6..826018a8a9f 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2245,10 +2245,11 @@ SELECT count(*) FROM tenk1 LEFT JOIN tenk2 ON
          ->  Memoize
                Cache Key: tenk1.hundred
                Cache Mode: logical
+               Estimated Capacity: 100  Estimated Distinct Lookup Keys: 100
                ->  Index Scan using tenk2_hundred on tenk2
                      Index Cond: (hundred = tenk1.hundred)
                      Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2))
-(10 rows)
+(11 rows)
 
 --
 -- Check behavior with duplicate index column contents
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 14da5708451..eca6ce2c6f0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2729,8 +2729,8 @@ select * from onek t1
     left join lateral
       (select * from onek t3 where t3.two = t2.two offset 0) s
       on t2.unique1 = 1;
-                    QUERY PLAN                    
---------------------------------------------------
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
  Nested Loop Left Join
    ->  Seq Scan on onek t1
    ->  Materialize
@@ -2740,9 +2740,10 @@ select * from onek t1
                ->  Memoize
                      Cache Key: t2.two
                      Cache Mode: binary
+                     Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                      ->  Seq Scan on onek t3
                            Filter: (two = t2.two)
-(11 rows)
+(12 rows)
 
 --
 -- check a case where we formerly got confused by conflicting sort orders
@@ -5128,8 +5129,8 @@ select * from
   on i8.q2 = 123,
   lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss
 where t1.f1 = ss.f1;
-                    QUERY PLAN                    
---------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
  Nested Loop
    Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
    Join Filter: (t1.f1 = t2.f1)
@@ -5146,11 +5147,12 @@ where t1.f1 = ss.f1;
          Output: (i8.q1), t2.f1
          Cache Key: i8.q1
          Cache Mode: binary
+         Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
          ->  Limit
                Output: (i8.q1), t2.f1
                ->  Seq Scan on public.text_tbl t2
                      Output: i8.q1, t2.f1
-(20 rows)
+(21 rows)
 
 select * from
   text_tbl t1
@@ -5171,8 +5173,8 @@ select * from
   lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
   lateral (select ss1.* from text_tbl t3 limit 1) as ss2
 where t1.f1 = ss2.f1;
-                            QUERY PLAN                             
--------------------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Nested Loop
    Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1)
    Join Filter: (t1.f1 = (t2.f1))
@@ -5191,6 +5193,7 @@ where t1.f1 = ss2.f1;
                Output: (i8.q1), t2.f1
                Cache Key: i8.q1
                Cache Mode: binary
+               Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
                ->  Limit
                      Output: (i8.q1), t2.f1
                      ->  Seq Scan on public.text_tbl t2
@@ -5199,11 +5202,12 @@ where t1.f1 = ss2.f1;
          Output: ((i8.q1)), (t2.f1)
          Cache Key: (i8.q1), t2.f1
          Cache Mode: binary
+         Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
          ->  Limit
                Output: ((i8.q1)), (t2.f1)
                ->  Seq Scan on public.text_tbl t3
                      Output: (i8.q1), t2.f1
-(30 rows)
+(32 rows)
 
 select * from
   text_tbl t1
@@ -5225,8 +5229,8 @@ select 1 from
   left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
   lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
 where tt1.f1 = ss1.c0;
-                        QUERY PLAN                        
-----------------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
  Nested Loop
    Output: 1
    ->  Nested Loop Left Join
@@ -5252,6 +5256,7 @@ where tt1.f1 = ss1.c0;
          Output: ss1.c0
          Cache Key: tt4.f1
          Cache Mode: binary
+         Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
          ->  Subquery Scan on ss1
                Output: ss1.c0
                Filter: (ss1.c0 = 'foo'::text)
@@ -5259,7 +5264,7 @@ where tt1.f1 = ss1.c0;
                      Output: (tt4.f1)
                      ->  Seq Scan on public.text_tbl tt5
                            Output: tt4.f1
-(32 rows)
+(33 rows)
 
 select 1 from
   text_tbl as tt1
@@ -6462,17 +6467,18 @@ select * from sj t1
     join lateral
       (select * from sj tablesample system(t1.b)) s
     on t1.a = s.a;
-              QUERY PLAN               
----------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
  Nested Loop
    ->  Seq Scan on sj t1
    ->  Memoize
          Cache Key: t1.a, t1.b
          Cache Mode: binary
+         Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
          ->  Sample Scan on sj
                Sampling: system (t1.b)
                Filter: (t1.a = a)
-(8 rows)
+(9 rows)
 
 -- Ensure that SJE does not form a self-referential lateral dependency
 explain (costs off)
@@ -7614,43 +7620,46 @@ select count(*) from tenk1 a, lateral generate_series(1,two) g;
 
 explain (costs off)
   select count(*) from tenk1 a, lateral generate_series(1,two) g;
-                      QUERY PLAN                      
-------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Aggregate
    ->  Nested Loop
          ->  Seq Scan on tenk1 a
          ->  Memoize
                Cache Key: a.two
                Cache Mode: binary
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                ->  Function Scan on generate_series g
-(7 rows)
+(8 rows)
 
 explain (costs off)
   select count(*) from tenk1 a cross join lateral generate_series(1,two) g;
-                      QUERY PLAN                      
-------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Aggregate
    ->  Nested Loop
          ->  Seq Scan on tenk1 a
          ->  Memoize
                Cache Key: a.two
                Cache Mode: binary
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                ->  Function Scan on generate_series g
-(7 rows)
+(8 rows)
 
 -- don't need the explicit LATERAL keyword for functions
 explain (costs off)
   select count(*) from tenk1 a, generate_series(1,two) g;
-                      QUERY PLAN                      
-------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Aggregate
    ->  Nested Loop
          ->  Seq Scan on tenk1 a
          ->  Memoize
                Cache Key: a.two
                Cache Mode: binary
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                ->  Function Scan on generate_series g
-(7 rows)
+(8 rows)
 
 -- lateral with UNION ALL subselect
 explain (costs off)
@@ -7702,8 +7711,8 @@ select count(*) from tenk1 a,
 explain (costs off)
   select count(*) from tenk1 a,
     tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
-                            QUERY PLAN                            
-------------------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Aggregate
    ->  Nested Loop
          ->  Nested Loop
@@ -7712,9 +7721,10 @@ explain (costs off)
          ->  Memoize
                Cache Key: "*VALUES*".column1
                Cache Mode: logical
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                ->  Index Only Scan using tenk1_unique2 on tenk1 b
                      Index Cond: (unique2 = "*VALUES*".column1)
-(10 rows)
+(11 rows)
 
 select count(*) from tenk1 a,
   tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 38dfaf021c9..39c76aaa1a4 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -46,12 +46,13 @@ WHERE t2.unique1 < 1000;', false);
          ->  Memoize (actual rows=1.00 loops=N)
                Cache Key: t2.twenty
                Cache Mode: logical
+               Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N)
                      Index Cond: (unique1 = t2.twenty)
                      Heap Fetches: N
                      Index Searches: N
-(13 rows)
+(14 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
@@ -78,12 +79,13 @@ WHERE t1.unique1 < 1000;', false);
          ->  Memoize (actual rows=1.00 loops=N)
                Cache Key: t1.twenty
                Cache Mode: binary
+               Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1.00 loops=N)
                      Index Cond: (unique1 = t1.twenty)
                      Heap Fetches: N
                      Index Searches: N
-(13 rows)
+(14 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
@@ -113,6 +115,7 @@ WHERE t1.unique1 < 10;', false);
          ->  Memoize (actual rows=2.00 loops=N)
                Cache Key: t1.two
                Cache Mode: binary
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                Hits: 8  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Subquery Scan on t2 (actual rows=2.00 loops=N)
                      Filter: (t1.two = t2.two)
@@ -120,7 +123,7 @@ WHERE t1.unique1 < 10;', false);
                      ->  Index Scan using tenk1_unique1 on tenk1 t2_1 (actual rows=4.00 loops=N)
                            Index Cond: (unique1 < 4)
                            Index Searches: N
-(15 rows)
+(16 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
@@ -149,13 +152,14 @@ WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
          ->  Memoize (actual rows=1.00 loops=N)
                Cache Key: (t1.two + 1)
                Cache Mode: binary
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                Hits: 998  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1.00 loops=N)
                      Filter: ((t1.two + 1) = unique1)
                      Rows Removed by Filter: 9999
                      Heap Fetches: N
                      Index Searches: N
-(14 rows)
+(15 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
@@ -182,11 +186,12 @@ WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
          ->  Memoize (actual rows=1.00 loops=N)
                Cache Key: t1.two, t1.twenty
                Cache Mode: binary
+               Estimated Capacity: 40  Estimated Distinct Lookup Keys: 40
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Seq Scan on tenk1 t2 (actual rows=1.00 loops=N)
                      Filter: ((t1.twenty = unique1) AND (t1.two = two))
                      Rows Removed by Filter: 9999
-(12 rows)
+(13 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
@@ -220,13 +225,14 @@ ON t1.x = t2.t::numeric AND t1.t::numeric = t2.x;', false);
    ->  Memoize (actual rows=2.00 loops=N)
          Cache Key: t1.x, (t1.t)::numeric
          Cache Mode: logical
+         Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
          Hits: 20  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Only Scan using expr_key_idx_x_t on expr_key t2 (actual rows=2.00 loops=N)
                Index Cond: (x = (t1.t)::numeric)
                Filter: (t1.x = (t)::numeric)
                Heap Fetches: N
                Index Searches: N
-(11 rows)
+(12 rows)
 
 DROP TABLE expr_key;
 -- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
@@ -249,12 +255,13 @@ WHERE t2.unique1 < 1200;', true);
          ->  Memoize (actual rows=1.00 loops=N)
                Cache Key: t2.thousand
                Cache Mode: logical
+               Estimated Capacity: 655  Estimated Distinct Lookup Keys: 721
                Hits: N  Misses: N  Evictions: N  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N)
                      Index Cond: (unique1 = t2.thousand)
                      Heap Fetches: N
                      Index Searches: N
-(13 rows)
+(14 rows)
 
 CREATE TABLE flt (f float);
 CREATE INDEX flt_f_idx ON flt (f);
@@ -273,12 +280,13 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
    ->  Memoize (actual rows=2.00 loops=N)
          Cache Key: f1.f
          Cache Mode: logical
+         Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
          Hits: 1  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N)
                Index Cond: (f = f1.f)
                Heap Fetches: N
                Index Searches: N
-(12 rows)
+(13 rows)
 
 -- Ensure memoize operates in binary mode
 SELECT explain_memoize('
@@ -292,12 +300,13 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false);
    ->  Memoize (actual rows=2.00 loops=N)
          Cache Key: f1.f
          Cache Mode: binary
+         Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
          Hits: 0  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N)
                Index Cond: (f <= f1.f)
                Heap Fetches: N
                Index Searches: N
-(12 rows)
+(13 rows)
 
 DROP TABLE flt;
 -- Exercise Memoize in binary mode with a large fixed width type and a
@@ -320,11 +329,12 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false);
    ->  Memoize (actual rows=4.00 loops=N)
          Cache Key: s1.n
          Cache Mode: binary
+         Estimated Capacity: 3  Estimated Distinct Lookup Keys: 3
          Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Scan using strtest_n_idx on strtest s2 (actual rows=4.00 loops=N)
                Index Cond: (n <= s1.n)
                Index Searches: N
-(10 rows)
+(11 rows)
 
 -- Ensure we get 3 hits and 3 misses
 SELECT explain_memoize('
@@ -337,11 +347,12 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
    ->  Memoize (actual rows=4.00 loops=N)
          Cache Key: s1.t
          Cache Mode: binary
+         Estimated Capacity: 4  Estimated Distinct Lookup Keys: 4
          Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Scan using strtest_t_idx on strtest s2 (actual rows=4.00 loops=N)
                Index Cond: (t <= s1.t)
                Index Searches: N
-(10 rows)
+(11 rows)
 
 DROP TABLE strtest;
 -- Ensure memoize works with partitionwise join
@@ -366,6 +377,7 @@ SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false);
          ->  Memoize (actual rows=4.00 loops=N)
                Cache Key: t1_1.a
                Cache Mode: logical
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                Hits: 3  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using iprt_p1_a on prt_p1 t2_1 (actual rows=4.00 loops=N)
                      Index Cond: (a = t1_1.a)
@@ -378,12 +390,13 @@ SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false);
          ->  Memoize (actual rows=4.00 loops=N)
                Cache Key: t1_2.a
                Cache Mode: logical
+               Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
                Hits: 3  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using iprt_p2_a on prt_p2 t2_2 (actual rows=4.00 loops=N)
                      Index Cond: (a = t1_2.a)
                      Heap Fetches: N
                      Index Searches: N
-(25 rows)
+(27 rows)
 
 -- Ensure memoize works with parameterized union-all Append path
 SET enable_partitionwise_join TO off;
@@ -400,6 +413,7 @@ ON t1.a = t2.a;', false);
    ->  Memoize (actual rows=4.00 loops=N)
          Cache Key: t1.a
          Cache Mode: logical
+         Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
          Hits: 3  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Append (actual rows=4.00 loops=N)
                ->  Index Only Scan using iprt_p1_a on prt_p1 (actual rows=4.00 loops=N)
@@ -410,7 +424,7 @@ ON t1.a = t2.a;', false);
                      Index Cond: (a = t1.a)
                      Heap Fetches: N
                      Index Searches: N
-(17 rows)
+(18 rows)
 
 DROP TABLE prt;
 RESET enable_partitionwise_join;
@@ -424,8 +438,8 @@ WHERE unique1 < 3
 	SELECT 1 FROM tenk1 t1
 	INNER JOIN tenk1 t2 ON t1.unique1 = t2.hundred
 	WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0);
-                           QUERY PLAN                           
-----------------------------------------------------------------
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
  Index Scan using tenk1_unique1 on tenk1 t0
    Index Cond: (unique1 < 3)
    Filter: EXISTS(SubPlan 1)
@@ -436,10 +450,11 @@ WHERE unique1 < 3
            ->  Memoize
                  Cache Key: t2.hundred
                  Cache Mode: logical
+                 Estimated Capacity: 100  Estimated Distinct Lookup Keys: 100
                  ->  Index Scan using tenk1_unique1 on tenk1 t1
                        Index Cond: (unique1 = t2.hundred)
                        Filter: (t0.ten = twenty)
-(13 rows)
+(14 rows)
 
 -- Ensure the above query returns the correct result
 SELECT unique1 FROM tenk1 t0
@@ -469,8 +484,8 @@ EXPLAIN (COSTS OFF)
 SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
 LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1) t2
 WHERE t1.unique1 < 1000;
-                                  QUERY PLAN                                   
--------------------------------------------------------------------------------
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
@@ -483,9 +498,10 @@ WHERE t1.unique1 < 1000;
                      ->  Memoize
                            Cache Key: t1.twenty
                            Cache Mode: logical
+                           Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
                            ->  Index Only Scan using tenk1_unique1 on tenk1 t2
                                  Index Cond: (unique1 = t1.twenty)
-(14 rows)
+(15 rows)
 
 -- And ensure the parallel plan gives us the correct results.
 SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 6101c8c7cf1..d3c37ffe22b 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -5289,8 +5289,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1;
 
 -- Increase number of tuples requested and an IndexScan will be chosen
 EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
+                                   QUERY PLAN                                   
+--------------------------------------------------------------------------------
  Limit
    ->  Append
          ->  Nested Loop
@@ -5298,6 +5298,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                ->  Memoize
                      Cache Key: p1_1.c
                      Cache Mode: logical
+                     Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
                      ->  Index Scan using pht1_p1_c_idx on pht1_p1 p2_1
                            Index Cond: (c = p1_1.c)
          ->  Nested Loop
@@ -5305,6 +5306,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                ->  Memoize
                      Cache Key: p1_2.c
                      Cache Mode: logical
+                     Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
                      ->  Index Scan using pht1_p2_c_idx on pht1_p2 p2_2
                            Index Cond: (c = p1_2.c)
          ->  Nested Loop
@@ -5312,9 +5314,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                ->  Memoize
                      Cache Key: p1_3.c
                      Cache Mode: logical
+                     Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
                      ->  Index Scan using pht1_p3_c_idx on pht1_p3 p2_3
                            Index Cond: (c = p1_3.c)
-(23 rows)
+(26 rows)
 
 -- If almost all the data should be fetched - prefer SeqScan
 EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1000;
@@ -5343,8 +5346,8 @@ SET max_parallel_workers_per_gather = 1;
 SET debug_parallel_query = on;
 -- Partial paths should also be smart enough to employ limits
 EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
-                                  QUERY PLAN                                  
-------------------------------------------------------------------------------
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
  Gather
    Workers Planned: 1
    Single Copy: true
@@ -5355,6 +5358,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                      ->  Memoize
                            Cache Key: p1_1.c
                            Cache Mode: logical
+                           Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
                            ->  Index Scan using pht1_p1_c_idx on pht1_p1 p2_1
                                  Index Cond: (c = p1_1.c)
                ->  Nested Loop
@@ -5362,6 +5366,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                      ->  Memoize
                            Cache Key: p1_2.c
                            Cache Mode: logical
+                           Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
                            ->  Index Scan using pht1_p2_c_idx on pht1_p2 p2_2
                                  Index Cond: (c = p1_2.c)
                ->  Nested Loop
@@ -5369,9 +5374,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                      ->  Memoize
                            Cache Key: p1_3.c
                            Cache Mode: logical
+                           Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
                            ->  Index Scan using pht1_p3_c_idx on pht1_p3 p2_3
                                  Index Cond: (c = p1_3.c)
-(26 rows)
+(29 rows)
 
 RESET debug_parallel_query;
 -- Remove indexes from the partitioned table and its partitions
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 40d8056fcea..f5575c7d332 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1316,8 +1316,8 @@ select sum(o.four), sum(ss.a) from
     select * from x
   ) ss
 where o.ten = 1;
-                       QUERY PLAN                        
----------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Aggregate
    ->  Nested Loop
          ->  Seq Scan on onek o
@@ -1325,13 +1325,14 @@ where o.ten = 1;
          ->  Memoize
                Cache Key: o.four
                Cache Mode: binary
+               Estimated Capacity: 4  Estimated Distinct Lookup Keys: 4
                ->  CTE Scan on x
                      CTE x
                        ->  Recursive Union
                              ->  Result
                              ->  WorkTable Scan on x x_1
                                    Filter: (a < 10)
-(13 rows)
+(14 rows)
 
 select sum(o.four), sum(ss.a) from
   onek o cross join lateral (
@@ -2642,6 +2643,7 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
                ->  Memoize
                      Cache Key: b.hundred, b.odd
                      Cache Mode: binary
+                     Estimated Capacity: 1000  Estimated Distinct Lookup Keys: 1000
                      ->  Subquery Scan on "ANY_subquery"
                            Filter: (b.hundred = "ANY_subquery".min)
                            ->  Result
@@ -2650,7 +2652,7 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
                                          ->  Index Scan using tenk2_hundred on tenk2 c
                                                Index Cond: (hundred IS NOT NULL)
                                                Filter: (odd = b.odd)
-(16 rows)
+(17 rows)
 
 --
 -- Test VALUES to ARRAY (VtA) transformation
-- 
2.34.1

From b2dc58e271075909d5136b09259f2f6b83d4d112 Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdoki...@tantorlabs.com>
Date: Tue, 15 Apr 2025 01:53:27 +0300
Subject: [PATCH v7 2/2] Add Estimated Hit Ratio for Memoize plan nodes in
 EXPLAIN

---
 src/backend/commands/explain.c               |  4 +-
 src/backend/optimizer/path/costsize.c        |  3 ++
 src/backend/optimizer/plan/createplan.c      |  7 +--
 src/backend/optimizer/util/pathnode.c        |  5 +++
 src/include/nodes/pathnodes.h                |  1 +
 src/include/nodes/plannodes.h                |  3 ++
 src/test/regress/expected/create_index.out   |  3 +-
 src/test/regress/expected/join.out           | 28 ++++++++----
 src/test/regress/expected/memoize.out        | 46 +++++++++++++-------
 src/test/regress/expected/partition_join.out | 10 ++++-
 src/test/regress/expected/subselect.out      |  6 ++-
 11 files changed, 83 insertions(+), 33 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 0a4ce5ee7d7..d8526b7d06d 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3634,11 +3634,13 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
 		appendStringInfo(es->str, "Estimated Capacity: %u  Estimated Distinct Lookup Keys: %0.0f\n",
 						((Memoize *) plan)->est_entries,
 						((Memoize *) plan)->est_unique_keys);
-	}
+		ExplainIndentText(es);
+		appendStringInfo(es->str, "Estimated Hit Ratio: %0.2f\n", ((Memoize *) plan)->hit_ratio);	}
 	else
 	{
 		ExplainPropertyUInteger("Estimated Capacity", "", ((Memoize *) plan)->est_entries, es);
 		ExplainPropertyFloat("Estimated Distinct Lookup Keys", "", ((Memoize *) plan)->est_unique_keys, 0, es);
+		ExplainPropertyFloat("Estimated Hit Ratio", "", ((Memoize *) plan)->hit_ratio, 2, es);
 	}
 
 	pfree(keystr.data);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index f72319d903c..3e99214501b 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2624,6 +2624,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	hit_ratio = ((calls - ndistinct) / calls) *
 		(est_cache_entries / Max(ndistinct, est_cache_entries));
 
+	/* Remember cache hit ratio for a potential EXPLAIN later */
+	mpath->hit_ratio = hit_ratio;
+
 	Assert(hit_ratio >= 0 && hit_ratio <= 1.0);
 
 	/*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index a1456c9014d..ccb880158fe 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -285,7 +285,7 @@ static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
 							 Oid *collations, List *param_exprs,
 							 bool singlerow, bool binary_mode,
 							uint32 est_entries, Bitmapset *keyparamids,
-							double est_unique_keys);
+							double est_unique_keys, double hit_ratio);
 static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
 								 int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
 								 int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1705,7 +1705,7 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
 	plan = make_memoize(subplan, operators, collations, param_exprs,
 						best_path->singlerow, best_path->binary_mode,
 						best_path->est_entries, keyparamids,
-						best_path->est_unique_keys);
+						best_path->est_unique_keys, best_path->hit_ratio);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -6639,7 +6639,7 @@ static Memoize *
 make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 			 List *param_exprs, bool singlerow, bool binary_mode,
 			uint32 est_entries, Bitmapset *keyparamids,
-			double est_unique_keys)
+			double est_unique_keys, double hit_ratio)
 {
 	Memoize    *node = makeNode(Memoize);
 	Plan	   *plan = &node->plan;
@@ -6658,6 +6658,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 	node->est_entries = est_entries;
 	node->keyparamids = keyparamids;
 	node->est_unique_keys = est_unique_keys;
+	node->hit_ratio = hit_ratio;
 
 	return node;
 }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 1fbcda99067..5971f67b77a 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1704,6 +1704,11 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	/* Estimated number of distinct memoization keys, computed using estimate_num_groups() */
 	pathnode->est_unique_keys = 0;
 
+	/*
+	 * The estimated cache hit ratio will calculated later by cost_memoize_rescan()
+	 */
+	pathnode->hit_ratio = 0;
+
 	/*
 	 * Add a small additional charge for caching the first entry.  All the
 	 * harder calculations for rescans are performed in cost_memoize_rescan().
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 07d97dc0b5b..9bfc35cd4f6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2140,6 +2140,7 @@ typedef struct MemoizePath
 								 * if unknown */
 	double		est_unique_keys;	/* Estimated number of distinct memoization keys,
 								 * used for cache size evaluation. Kept for EXPLAIN */
+	double		hit_ratio;		/* Estimated cache hit ratio, kept for EXPLAIN */
 } MemoizePath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 3d9d3a1159d..a7d350ce9ca 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1069,6 +1069,9 @@ typedef struct Memoize
 	 * used for cache size evaluation. Kept for EXPLAIN
 	 */
 	double		est_unique_keys;
+
+	/* Estimated cache hit ratio, kept for EXPLAIN */
+	double		hit_ratio;
 } Memoize;
 
 /* ----------------
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 826018a8a9f..0fa4359e23c 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2246,10 +2246,11 @@ SELECT count(*) FROM tenk1 LEFT JOIN tenk2 ON
                Cache Key: tenk1.hundred
                Cache Mode: logical
                Estimated Capacity: 100  Estimated Distinct Lookup Keys: 100
+               Estimated Hit Ratio: 0.99
                ->  Index Scan using tenk2_hundred on tenk2
                      Index Cond: (hundred = tenk1.hundred)
                      Filter: ((thousand = 42) OR (thousand = 41) OR (tenthous = 2))
-(11 rows)
+(12 rows)
 
 --
 -- Check behavior with duplicate index column contents
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index eca6ce2c6f0..212b7897919 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2741,9 +2741,10 @@ select * from onek t1
                      Cache Key: t2.two
                      Cache Mode: binary
                      Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+                     Estimated Hit Ratio: 1.00
                      ->  Seq Scan on onek t3
                            Filter: (two = t2.two)
-(12 rows)
+(13 rows)
 
 --
 -- check a case where we formerly got confused by conflicting sort orders
@@ -5148,11 +5149,12 @@ where t1.f1 = ss.f1;
          Cache Key: i8.q1
          Cache Mode: binary
          Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
+         Estimated Hit Ratio: 0.50
          ->  Limit
                Output: (i8.q1), t2.f1
                ->  Seq Scan on public.text_tbl t2
                      Output: i8.q1, t2.f1
-(21 rows)
+(22 rows)
 
 select * from
   text_tbl t1
@@ -5194,6 +5196,7 @@ where t1.f1 = ss2.f1;
                Cache Key: i8.q1
                Cache Mode: binary
                Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
+               Estimated Hit Ratio: 0.50
                ->  Limit
                      Output: (i8.q1), t2.f1
                      ->  Seq Scan on public.text_tbl t2
@@ -5203,11 +5206,12 @@ where t1.f1 = ss2.f1;
          Cache Key: (i8.q1), t2.f1
          Cache Mode: binary
          Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
+         Estimated Hit Ratio: 0.50
          ->  Limit
                Output: ((i8.q1)), (t2.f1)
                ->  Seq Scan on public.text_tbl t3
                      Output: (i8.q1), t2.f1
-(32 rows)
+(34 rows)
 
 select * from
   text_tbl t1
@@ -5257,6 +5261,7 @@ where tt1.f1 = ss1.c0;
          Cache Key: tt4.f1
          Cache Mode: binary
          Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
+         Estimated Hit Ratio: 0.50
          ->  Subquery Scan on ss1
                Output: ss1.c0
                Filter: (ss1.c0 = 'foo'::text)
@@ -5264,7 +5269,7 @@ where tt1.f1 = ss1.c0;
                      Output: (tt4.f1)
                      ->  Seq Scan on public.text_tbl tt5
                            Output: tt4.f1
-(33 rows)
+(34 rows)
 
 select 1 from
   text_tbl as tt1
@@ -6475,10 +6480,11 @@ select * from sj t1
          Cache Key: t1.a, t1.b
          Cache Mode: binary
          Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+         Estimated Hit Ratio: 0.33
          ->  Sample Scan on sj
                Sampling: system (t1.b)
                Filter: (t1.a = a)
-(9 rows)
+(10 rows)
 
 -- Ensure that SJE does not form a self-referential lateral dependency
 explain (costs off)
@@ -7629,8 +7635,9 @@ explain (costs off)
                Cache Key: a.two
                Cache Mode: binary
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 1.00
                ->  Function Scan on generate_series g
-(8 rows)
+(9 rows)
 
 explain (costs off)
   select count(*) from tenk1 a cross join lateral generate_series(1,two) g;
@@ -7643,8 +7650,9 @@ explain (costs off)
                Cache Key: a.two
                Cache Mode: binary
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 1.00
                ->  Function Scan on generate_series g
-(8 rows)
+(9 rows)
 
 -- don't need the explicit LATERAL keyword for functions
 explain (costs off)
@@ -7658,8 +7666,9 @@ explain (costs off)
                Cache Key: a.two
                Cache Mode: binary
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 1.00
                ->  Function Scan on generate_series g
-(8 rows)
+(9 rows)
 
 -- lateral with UNION ALL subselect
 explain (costs off)
@@ -7722,9 +7731,10 @@ explain (costs off)
                Cache Key: "*VALUES*".column1
                Cache Mode: logical
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 1.00
                ->  Index Only Scan using tenk1_unique2 on tenk1 b
                      Index Cond: (unique2 = "*VALUES*".column1)
-(11 rows)
+(12 rows)
 
 select count(*) from tenk1 a,
   tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 39c76aaa1a4..1e1b3419991 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -47,12 +47,13 @@ WHERE t2.unique1 < 1000;', false);
                Cache Key: t2.twenty
                Cache Mode: logical
                Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
+               Estimated Hit Ratio: 0.98
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N)
                      Index Cond: (unique1 = t2.twenty)
                      Heap Fetches: N
                      Index Searches: N
-(14 rows)
+(15 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
@@ -80,12 +81,13 @@ WHERE t1.unique1 < 1000;', false);
                Cache Key: t1.twenty
                Cache Mode: binary
                Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
+               Estimated Hit Ratio: 0.98
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1.00 loops=N)
                      Index Cond: (unique1 = t1.twenty)
                      Heap Fetches: N
                      Index Searches: N
-(14 rows)
+(15 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
@@ -116,6 +118,7 @@ WHERE t1.unique1 < 10;', false);
                Cache Key: t1.two
                Cache Mode: binary
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 0.80
                Hits: 8  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Subquery Scan on t2 (actual rows=2.00 loops=N)
                      Filter: (t1.two = t2.two)
@@ -123,7 +126,7 @@ WHERE t1.unique1 < 10;', false);
                      ->  Index Scan using tenk1_unique1 on tenk1 t2_1 (actual rows=4.00 loops=N)
                            Index Cond: (unique1 < 4)
                            Index Searches: N
-(16 rows)
+(17 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
@@ -153,13 +156,14 @@ WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
                Cache Key: (t1.two + 1)
                Cache Mode: binary
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 1.00
                Hits: 998  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1.00 loops=N)
                      Filter: ((t1.two + 1) = unique1)
                      Rows Removed by Filter: 9999
                      Heap Fetches: N
                      Index Searches: N
-(15 rows)
+(16 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
@@ -187,11 +191,12 @@ WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
                Cache Key: t1.two, t1.twenty
                Cache Mode: binary
                Estimated Capacity: 40  Estimated Distinct Lookup Keys: 40
+               Estimated Hit Ratio: 0.96
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Seq Scan on tenk1 t2 (actual rows=1.00 loops=N)
                      Filter: ((t1.twenty = unique1) AND (t1.two = two))
                      Rows Removed by Filter: 9999
-(13 rows)
+(14 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
@@ -226,13 +231,14 @@ ON t1.x = t2.t::numeric AND t1.t::numeric = t2.x;', false);
          Cache Key: t1.x, (t1.t)::numeric
          Cache Mode: logical
          Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
+         Estimated Hit Ratio: 0.50
          Hits: 20  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Only Scan using expr_key_idx_x_t on expr_key t2 (actual rows=2.00 loops=N)
                Index Cond: (x = (t1.t)::numeric)
                Filter: (t1.x = (t)::numeric)
                Heap Fetches: N
                Index Searches: N
-(12 rows)
+(13 rows)
 
 DROP TABLE expr_key;
 -- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
@@ -256,12 +262,13 @@ WHERE t2.unique1 < 1200;', true);
                Cache Key: t2.thousand
                Cache Mode: logical
                Estimated Capacity: 655  Estimated Distinct Lookup Keys: 721
+               Estimated Hit Ratio: 0.36
                Hits: N  Misses: N  Evictions: N  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1.00 loops=N)
                      Index Cond: (unique1 = t2.thousand)
                      Heap Fetches: N
                      Index Searches: N
-(14 rows)
+(15 rows)
 
 CREATE TABLE flt (f float);
 CREATE INDEX flt_f_idx ON flt (f);
@@ -281,12 +288,13 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
          Cache Key: f1.f
          Cache Mode: logical
          Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
+         Estimated Hit Ratio: 0.50
          Hits: 1  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N)
                Index Cond: (f = f1.f)
                Heap Fetches: N
                Index Searches: N
-(13 rows)
+(14 rows)
 
 -- Ensure memoize operates in binary mode
 SELECT explain_memoize('
@@ -301,12 +309,13 @@ SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false);
          Cache Key: f1.f
          Cache Mode: binary
          Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
+         Estimated Hit Ratio: 0.50
          Hits: 0  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2.00 loops=N)
                Index Cond: (f <= f1.f)
                Heap Fetches: N
                Index Searches: N
-(13 rows)
+(14 rows)
 
 DROP TABLE flt;
 -- Exercise Memoize in binary mode with a large fixed width type and a
@@ -330,11 +339,12 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false);
          Cache Key: s1.n
          Cache Mode: binary
          Estimated Capacity: 3  Estimated Distinct Lookup Keys: 3
+         Estimated Hit Ratio: 0.50
          Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Scan using strtest_n_idx on strtest s2 (actual rows=4.00 loops=N)
                Index Cond: (n <= s1.n)
                Index Searches: N
-(11 rows)
+(12 rows)
 
 -- Ensure we get 3 hits and 3 misses
 SELECT explain_memoize('
@@ -348,11 +358,12 @@ SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
          Cache Key: s1.t
          Cache Mode: binary
          Estimated Capacity: 4  Estimated Distinct Lookup Keys: 4
+         Estimated Hit Ratio: 0.33
          Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Index Scan using strtest_t_idx on strtest s2 (actual rows=4.00 loops=N)
                Index Cond: (t <= s1.t)
                Index Searches: N
-(11 rows)
+(12 rows)
 
 DROP TABLE strtest;
 -- Ensure memoize works with partitionwise join
@@ -378,6 +389,7 @@ SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false);
                Cache Key: t1_1.a
                Cache Mode: logical
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 0.50
                Hits: 3  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using iprt_p1_a on prt_p1 t2_1 (actual rows=4.00 loops=N)
                      Index Cond: (a = t1_1.a)
@@ -391,12 +403,13 @@ SELECT * FROM prt t1 INNER JOIN prt t2 ON t1.a = t2.a;', false);
                Cache Key: t1_2.a
                Cache Mode: logical
                Estimated Capacity: 2  Estimated Distinct Lookup Keys: 2
+               Estimated Hit Ratio: 0.50
                Hits: 3  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using iprt_p2_a on prt_p2 t2_2 (actual rows=4.00 loops=N)
                      Index Cond: (a = t1_2.a)
                      Heap Fetches: N
                      Index Searches: N
-(27 rows)
+(29 rows)
 
 -- Ensure memoize works with parameterized union-all Append path
 SET enable_partitionwise_join TO off;
@@ -414,6 +427,7 @@ ON t1.a = t2.a;', false);
          Cache Key: t1.a
          Cache Mode: logical
          Estimated Capacity: 1  Estimated Distinct Lookup Keys: 1
+         Estimated Hit Ratio: 0.75
          Hits: 3  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
          ->  Append (actual rows=4.00 loops=N)
                ->  Index Only Scan using iprt_p1_a on prt_p1 (actual rows=4.00 loops=N)
@@ -424,7 +438,7 @@ ON t1.a = t2.a;', false);
                      Index Cond: (a = t1.a)
                      Heap Fetches: N
                      Index Searches: N
-(18 rows)
+(19 rows)
 
 DROP TABLE prt;
 RESET enable_partitionwise_join;
@@ -451,10 +465,11 @@ WHERE unique1 < 3
                  Cache Key: t2.hundred
                  Cache Mode: logical
                  Estimated Capacity: 100  Estimated Distinct Lookup Keys: 100
+                 Estimated Hit Ratio: 0.99
                  ->  Index Scan using tenk1_unique1 on tenk1 t1
                        Index Cond: (unique1 = t2.hundred)
                        Filter: (t0.ten = twenty)
-(14 rows)
+(15 rows)
 
 -- Ensure the above query returns the correct result
 SELECT unique1 FROM tenk1 t0
@@ -499,9 +514,10 @@ WHERE t1.unique1 < 1000;
                            Cache Key: t1.twenty
                            Cache Mode: logical
                            Estimated Capacity: 20  Estimated Distinct Lookup Keys: 20
+                           Estimated Hit Ratio: 0.95
                            ->  Index Only Scan using tenk1_unique1 on tenk1 t2
                                  Index Cond: (unique1 = t1.twenty)
-(15 rows)
+(16 rows)
 
 -- And ensure the parallel plan gives us the correct results.
 SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index d3c37ffe22b..15a6d94d7c0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -5299,6 +5299,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                      Cache Key: p1_1.c
                      Cache Mode: logical
                      Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
+                     Estimated Hit Ratio: 0.76
                      ->  Index Scan using pht1_p1_c_idx on pht1_p1 p2_1
                            Index Cond: (c = p1_1.c)
          ->  Nested Loop
@@ -5307,6 +5308,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                      Cache Key: p1_2.c
                      Cache Mode: logical
                      Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
+                     Estimated Hit Ratio: 0.90
                      ->  Index Scan using pht1_p2_c_idx on pht1_p2 p2_2
                            Index Cond: (c = p1_2.c)
          ->  Nested Loop
@@ -5315,9 +5317,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                      Cache Key: p1_3.c
                      Cache Mode: logical
                      Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
+                     Estimated Hit Ratio: 0.90
                      ->  Index Scan using pht1_p3_c_idx on pht1_p3 p2_3
                            Index Cond: (c = p1_3.c)
-(26 rows)
+(29 rows)
 
 -- If almost all the data should be fetched - prefer SeqScan
 EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1000;
@@ -5359,6 +5362,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                            Cache Key: p1_1.c
                            Cache Mode: logical
                            Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
+                           Estimated Hit Ratio: 0.76
                            ->  Index Scan using pht1_p1_c_idx on pht1_p1 p2_1
                                  Index Cond: (c = p1_1.c)
                ->  Nested Loop
@@ -5367,6 +5371,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                            Cache Key: p1_2.c
                            Cache Mode: logical
                            Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
+                           Estimated Hit Ratio: 0.90
                            ->  Index Scan using pht1_p2_c_idx on pht1_p2 p2_2
                                  Index Cond: (c = p1_2.c)
                ->  Nested Loop
@@ -5375,9 +5380,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
                            Cache Key: p1_3.c
                            Cache Mode: logical
                            Estimated Capacity: 12  Estimated Distinct Lookup Keys: 12
+                           Estimated Hit Ratio: 0.90
                            ->  Index Scan using pht1_p3_c_idx on pht1_p3 p2_3
                                  Index Cond: (c = p1_3.c)
-(29 rows)
+(32 rows)
 
 RESET debug_parallel_query;
 -- Remove indexes from the partitioned table and its partitions
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index f5575c7d332..81e3e07d968 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1326,13 +1326,14 @@ where o.ten = 1;
                Cache Key: o.four
                Cache Mode: binary
                Estimated Capacity: 4  Estimated Distinct Lookup Keys: 4
+               Estimated Hit Ratio: 0.96
                ->  CTE Scan on x
                      CTE x
                        ->  Recursive Union
                              ->  Result
                              ->  WorkTable Scan on x x_1
                                    Filter: (a < 10)
-(14 rows)
+(15 rows)
 
 select sum(o.four), sum(ss.a) from
   onek o cross join lateral (
@@ -2644,6 +2645,7 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
                      Cache Key: b.hundred, b.odd
                      Cache Mode: binary
                      Estimated Capacity: 1000  Estimated Distinct Lookup Keys: 1000
+                     Estimated Hit Ratio: 0.90
                      ->  Subquery Scan on "ANY_subquery"
                            Filter: (b.hundred = "ANY_subquery".min)
                            ->  Result
@@ -2652,7 +2654,7 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
                                          ->  Index Scan using tenk2_hundred on tenk2 c
                                                Index Cond: (hundred IS NOT NULL)
                                                Filter: (odd = b.odd)
-(17 rows)
+(18 rows)
 
 --
 -- Test VALUES to ARRAY (VtA) transformation
-- 
2.34.1

Re: Add estimated hit ratio to Memoize in EXPLAIN to explain cost adjustment

Reply via email to