Hi,
In case of NestLoop with parameterised inner semi-join for each outer
tuple requires only a single tuple from its inner relation to produce a
result. It seems that the same principle applies to an anti-join. This
approach could effectively allow the Memoize node to enhance the
performance of pulled-up EXISTS and NOT EXISTS sublinks.
In attachment see a sketch of the feature. Since we are using single_row
mode, adapting this method to cache semi-join inner results should not
be extremely complex. However, I am unsure about potential corner cases
and would appreciate any feedback or criticisms regarding this approach.
--
regards, Andrei Lepikhov
From e47abde1f6a2c8c8d7fa588df72e59c0cd049d4c Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <lepi...@gmail.com>
Date: Wed, 25 Dec 2024 14:33:12 +0700
Subject: [PATCH v0] Memoise the inner of SEMI- and ANTI-join.
To get the result of semi-join or anti-join, we only need one tuple from
the parameterised inner. The Postgres core already includes Memoize's
single_row mode for the case when it is proved that inner returns only a single
value. Thus, implementing similar logic for semi-joins and anti-joins is
quite feasible.
It is clear that the semi-join operates correctly: it returns the outer tuple
as soon as the first tuple is received from the inner query. In contrast, the
behaviour of the anti-join is less straightforward. If anti-join had a join
clause not pushed to the parameterised inner, it would be possible to read more
than a single tuple from the inner, which causes inconsistency. However,
it seems that this is an impossible case for the current planner.
---
src/backend/commands/explain.c | 9 +++++++++
src/backend/optimizer/path/costsize.c | 2 +-
src/backend/optimizer/path/joinpath.c | 5 +++--
src/backend/optimizer/util/pathnode.c | 2 +-
src/test/regress/expected/join.out | 3 ++-
src/test/regress/expected/subselect.out | 3 ++-
6 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index d8a7232cedb..c90067dadcb 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3632,6 +3632,10 @@ show_memoize_info(MemoizeState *mstate, List *ancestors,
ExplainState *es)
{
ExplainPropertyText("Cache Key", keystr.data, es);
ExplainPropertyText("Cache Mode", mstate->binary_mode ?
"binary" : "logical", es);
+
+ /* Report only in the single mode case to not break current
tests */
+ if (mstate->singlerow)
+ ExplainPropertyText("Store Mode", "singlerow", es);
}
else
{
@@ -3639,6 +3643,11 @@ show_memoize_info(MemoizeState *mstate, List *ancestors,
ExplainState *es)
appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
ExplainIndentText(es);
appendStringInfo(es->str, "Cache Mode: %s\n",
mstate->binary_mode ? "binary" : "logical");
+ if (mstate->singlerow)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Store Mode: %s\n",
"singlerow");
+ }
}
pfree(keystr.data);
diff --git a/src/backend/optimizer/path/costsize.c
b/src/backend/optimizer/path/costsize.c
index 73d78617009..4cca5a7395c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2545,7 +2545,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
ListCell *lc;
Cost input_startup_cost = mpath->subpath->startup_cost;
Cost input_total_cost = mpath->subpath->total_cost;
- double tuples = mpath->subpath->rows;
+ double tuples = mpath->singlerow ? 1 : mpath->subpath->rows;
double calls = mpath->calls;
int width = mpath->subpath->pathtarget->width;
diff --git a/src/backend/optimizer/path/joinpath.c
b/src/backend/optimizer/path/joinpath.c
index 18891ce9156..89738b483ef 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -682,6 +682,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
ListCell *lc;
bool binary_mode;
List *ph_lateral_vars;
+ bool single_mode = false;
/* Obviously not if it's disabled */
if (!enable_memoize)
@@ -725,7 +726,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
*/
if (!extra->inner_unique && (jointype == JOIN_SEMI ||
jointype ==
JOIN_ANTI))
- return NULL;
+ single_mode = true;
/*
* Memoize normally marks cache entries as complete when it runs out of
@@ -808,7 +809,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
inner_path,
param_exprs,
hash_operators,
-
extra->inner_unique,
+
extra->inner_unique || single_mode,
binary_mode,
outer_path->rows);
}
diff --git a/src/backend/optimizer/util/pathnode.c
b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..9b4c0f96193 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1707,7 +1707,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath,
*/
pathnode->path.startup_cost = subpath->startup_cost + cpu_tuple_cost;
pathnode->path.total_cost = subpath->total_cost + cpu_tuple_cost;
- pathnode->path.rows = subpath->rows;
+ pathnode->path.rows = (pathnode->singlerow) ? 1 : subpath->rows;
return pathnode;
}
diff --git a/src/test/regress/expected/join.out
b/src/test/regress/expected/join.out
index a57bb18c24f..e8293c3c87d 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6469,10 +6469,11 @@ select * from sj t1
-> Memoize
Cache Key: t1.a, t1.b
Cache Mode: binary
+ Store Mode: singlerow
-> Sample Scan on sj
Sampling: system (t1.b)
Filter: (t1.a = a)
-(8 rows)
+(9 rows)
-- Ensure that SJE does not form a self-referential lateral dependency
explain (costs off)
diff --git a/src/test/regress/expected/subselect.out
b/src/test/regress/expected/subselect.out
index d0db8a412ff..5ab56f8d71f 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2642,6 +2642,7 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE
c.odd = b.odd);
-> Memoize
Cache Key: b.hundred, b.odd
Cache Mode: binary
+ Store Mode: singlerow
-> Subquery Scan on "ANY_subquery"
Filter: (b.hundred = "ANY_subquery".min)
-> Result
@@ -2650,5 +2651,5 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE
c.odd = b.odd);
-> Index Scan using tenk2_hundred on
tenk2 c
Index Cond: (hundred IS NOT
NULL)
Filter: (odd = b.odd)
-(16 rows)
+(17 rows)
--
2.48.1