On 4/10/2023 14:34, Alexander Korotkov wrote:
> Relid replacement machinery is the most contradictory code here. We used
> a utilitarian approach and implemented a simplistic variant.
> > 2) It would be nice to skip the insertion of IS NOT NULL checks when
> > they are not necessary. [1] points that infrastructure from [2] might
> > be useful. The patchset from [2] seems committed mow. However, I
> > can't see it is directly helpful in this matter. Could we just skip
> > adding IS NOT NULL clause for the columns, that have
> > pg_attribute.attnotnull set?
> Thanks for the links, I will look into that case.
To be more precise, in the attachment, you can find a diff to the main
patch, which shows the volume of changes to achieve the desired behaviour.
Some explains in regression tests shifted. So, I've made additional tests:
DROP TABLE test CASCADE;
CREATE TABLE test (a int, b int not null);
CREATE UNIQUE INDEX abc ON test(b);
explain SELECT * FROM test t1 JOIN test t2 ON (t1.a=t2.a)
WHERE t1.b=t2.b;
CREATE UNIQUE INDEX abc1 ON test(a,b);
explain SELECT * FROM test t1 JOIN test t2 ON (t1.a=t2.a)
WHERE t1.b=t2.b;
explain SELECT * FROM test t1 JOIN test t2 ON (t1.a=t2.a)
WHERE t1.b=t2.b AND (t1.a=t2.a OR t2.a=t1.a);
DROP INDEX abc1;
explain SELECT * FROM test t1 JOIN test t2 ON (t1.a=t2.a)
WHERE t1.b=t2.b AND (t1.b=t2.b OR t2.b=t1.b);
We have almost the results we wanted to have. But in the last explain
you can see that nothing happened with the OR clause. We should use the
expression mutator instead of walker to handle such clauses. But It
doesn't process the RestrictInfo node ... I'm inclined to put a solution
of this issue off for a while.
--
regards,
Andrey Lepikhov
Postgres Professional
diff --git a/src/backend/optimizer/plan/analyzejoins.c
b/src/backend/optimizer/plan/analyzejoins.c
index a127239d30..c12aa15fc9 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -73,7 +73,7 @@ static bool is_innerrel_unique_for(PlannerInfo *root,
List
*restrictlist,
List
**extra_clauses);
static Bitmapset *replace_relid(Relids relids, int oldId, int newId);
-static void replace_varno(Node *node, int from, int to);
+static void replace_varno(PlannerInfo *root, Node *node, int from, int to);
/*
@@ -388,7 +388,7 @@ remove_rel_from_query_common(PlannerInfo *root, RelOptInfo
*rel,
}
/* Update lateral references. */
- replace_varno((Node *) otherrel->lateral_vars, relid, subst);
+ replace_varno(root, (Node *) otherrel->lateral_vars, relid,
subst);
}
/*
@@ -425,7 +425,7 @@ remove_rel_from_query_common(PlannerInfo *root, RelOptInfo
*rel,
sjinf->commute_below_l = replace_relid(sjinf->commute_below_l,
ojrelid, subst);
sjinf->commute_below_r = replace_relid(sjinf->commute_below_r,
ojrelid, subst);
- replace_varno((Node *) sjinf->semi_rhs_exprs, relid, subst);
+ replace_varno(root, (Node *) sjinf->semi_rhs_exprs, relid,
subst);
}
/*
@@ -1399,6 +1399,7 @@ is_innerrel_unique_for(PlannerInfo *root,
typedef struct ReplaceVarnoContext
{
+ PlannerInfo *root;
int from;
int to;
} ReplaceVarnoContext;
@@ -1420,6 +1421,11 @@ replace_varno_walker(Node *node, ReplaceVarnoContext
*ctx)
}
return false;
}
+
+ /*
+ * Expression walker doesn't know about RestrictInfo node. Do recursive
pass
+ * into the clauses manually.
+ */
if (IsA(node, RestrictInfo))
{
RestrictInfo *rinfo = (RestrictInfo *) node;
@@ -1429,20 +1435,26 @@ replace_varno_walker(Node *node, ReplaceVarnoContext
*ctx)
if (bms_is_member(ctx->from, rinfo->clause_relids))
{
- replace_varno((Node *) rinfo->clause, ctx->from,
ctx->to);
- replace_varno((Node *) rinfo->orclause, ctx->from,
ctx->to);
- rinfo->clause_relids =
replace_relid(rinfo->clause_relids, ctx->from, ctx->to);
- rinfo->left_relids = replace_relid(rinfo->left_relids,
ctx->from, ctx->to);
- rinfo->right_relids =
replace_relid(rinfo->right_relids, ctx->from, ctx->to);
+ replace_varno(ctx->root, (Node *) rinfo->clause,
ctx->from, ctx->to);
+ replace_varno(ctx->root, (Node *) rinfo->orclause,
ctx->from, ctx->to);
+ rinfo->clause_relids =
+
replace_relid(rinfo->clause_relids, ctx->from, ctx->to);
+ rinfo->left_relids =
+
replace_relid(rinfo->left_relids, ctx->from, ctx->to);
+ rinfo->right_relids =
+
replace_relid(rinfo->right_relids, ctx->from, ctx->to);
}
if (is_req_equal)
rinfo->required_relids = rinfo->clause_relids;
else
- rinfo->required_relids =
replace_relid(rinfo->required_relids, ctx->from, ctx->to);
+ rinfo->required_relids =
+ replace_relid(rinfo->required_relids,
ctx->from, ctx->to);
- rinfo->outer_relids = replace_relid(rinfo->outer_relids,
ctx->from, ctx->to);
- rinfo->incompatible_relids =
replace_relid(rinfo->incompatible_relids, ctx->from, ctx->to);
+ rinfo->outer_relids =
+
replace_relid(rinfo->outer_relids, ctx->from, ctx->to);
+ rinfo->incompatible_relids =
+ replace_relid(rinfo->incompatible_relids,
ctx->from, ctx->to);
if (rinfo->mergeopfamilies &&
bms_get_singleton_member(rinfo->clause_relids, &relid)
&&
@@ -1456,13 +1468,31 @@ replace_varno_walker(Node *node, ReplaceVarnoContext
*ctx)
if (leftOp != NULL && equal(leftOp, rightOp))
{
- NullTest *ntest = makeNode(NullTest);
+ Oid reloid = 0;
+ AttrNumber attnum;
+
+ if (IsA(leftOp, Var))
+ {
+ Var *var = (Var *) leftOp;
+ reloid =
ctx->root->simple_rte_array[var->varno]->relid;
+ attnum = var->varattno;
+ }
+
+ if (OidIsValid(reloid) &&
get_attnotnull(reloid, attnum))
+ {
+ rinfo->clause = (Expr *)
makeBoolConst(true, false);
+ }
+ else
+ {
+ NullTest *ntest = makeNode(NullTest);
+
+ ntest->arg = leftOp;
+ ntest->nulltesttype = IS_NOT_NULL;
+ ntest->argisrow = false;
+ ntest->location = -1;
+ rinfo->clause = (Expr *) ntest;
+ }
- ntest->arg = leftOp;
- ntest->nulltesttype = IS_NOT_NULL;
- ntest->argisrow = false;
- ntest->location = -1;
- rinfo->clause = (Expr *) ntest;
rinfo->mergeopfamilies = NIL;
}
Assert(rinfo->orclause == NULL);
@@ -1477,13 +1507,14 @@ replace_varno_walker(Node *node, ReplaceVarnoContext
*ctx)
* Replace each occurence of removing relid with the keeping one
*/
static void
-replace_varno(Node *node, int from , int to)
+replace_varno(PlannerInfo *root, Node *node, int from , int to)
{
ReplaceVarnoContext ctx;
if (to <= 0)
return;
+ ctx.root = root;
ctx.from = from;
ctx.to = to;
replace_varno_walker(node, &ctx);
@@ -1531,7 +1562,7 @@ replace_relid(Relids relids, int oldId, int newId)
* delete them.
*/
static void
-update_eclass(EquivalenceClass *ec, int from, int to)
+update_eclass(PlannerInfo *root, EquivalenceClass *ec, int from, int to)
{
List *new_members = NIL;
List *new_sources = NIL;
@@ -1553,7 +1584,7 @@ update_eclass(EquivalenceClass *ec, int from, int to)
em->em_jdomain->jd_relids =
replace_relid(em->em_jdomain->jd_relids, from, to);
/* We only process inner joins */
- replace_varno((Node *) em->em_expr, from, to);
+ replace_varno(root, (Node *) em->em_expr, from, to);
foreach(lc1, new_members)
{
@@ -1591,7 +1622,7 @@ update_eclass(EquivalenceClass *ec, int from, int to)
continue;
}
- replace_varno((Node *) rinfo, from, to);
+ replace_varno(root, (Node *) rinfo, from, to);
/*
* After switching the clause to the remaining relation, check
it for
@@ -1685,7 +1716,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark
*kmark, PlanRowMark *rmark,
RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
remove_join_clause_from_rels(root, rinfo,
rinfo->required_relids);
- replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
+ replace_varno(root, (Node *) rinfo, toRemove->relid,
toKeep->relid);
if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
jinfo_candidates = lappend(jinfo_candidates, rinfo);
@@ -1704,7 +1735,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark
*kmark, PlanRowMark *rmark,
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
- replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
+ replace_varno(root, (Node *) rinfo, toRemove->relid,
toKeep->relid);
if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
jinfo_candidates = lappend(jinfo_candidates, rinfo);
@@ -1792,7 +1823,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark
*kmark, PlanRowMark *rmark,
{
EquivalenceClass *ec = (EquivalenceClass *)
list_nth(root->eq_classes, i);
- update_eclass(ec, toRemove->relid, toKeep->relid);
+ update_eclass(root, ec, toRemove->relid, toKeep->relid);
toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes,
i);
}
@@ -1804,7 +1835,7 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark
*kmark, PlanRowMark *rmark,
{
Node *node = lfirst(lc);
- replace_varno(node, toRemove->relid, toKeep->relid);
+ replace_varno(root, node, toRemove->relid, toKeep->relid);
if (!list_member(toKeep->reltarget->exprs, node))
toKeep->reltarget->exprs =
lappend(toKeep->reltarget->exprs, node);
}
@@ -1851,10 +1882,10 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark
*kmark, PlanRowMark *rmark,
/* At last, replace links in the planner info */
remove_rel_from_query_common(root, toRemove, toKeep->relid, NULL, NULL);
/* replace varno in root targetlist and HAVING clause */
- replace_varno((Node *) root->processed_tlist,
- toRemove->relid, toKeep->relid);
- replace_varno((Node *) root->processed_groupClause,
- toRemove->relid, toKeep->relid);
+ replace_varno(root, (Node *) root->processed_tlist,
+ toRemove->relid, toKeep->relid);
+ replace_varno(root, (Node *) root->processed_groupClause,
+ toRemove->relid, toKeep->relid);
/* ... and remove the rel from the baserel array */
root->simple_rel_array[toRemove->relid] = NULL;
pfree(toRemove);
@@ -1914,7 +1945,8 @@ split_selfjoin_quals(PlannerInfo *root, List *joinquals,
List **selfjoinquals,
* Quite expensive operation, narrowing the use case. For
example, when
* we have cast of the same var to different (but compatible)
types.
*/
- replace_varno(rightexpr,
bms_singleton_member(rinfo->right_relids),
+ replace_varno(root, rightexpr,
+
bms_singleton_member(rinfo->right_relids),
bms_singleton_member(rinfo->left_relids));
if (equal(leftexpr, rightexpr))
@@ -1954,7 +1986,7 @@ match_unique_clauses(PlannerInfo *root, RelOptInfo
*outer, List *uclauses,
bms_is_empty(rinfo->right_relids));
clause = (Expr *) copyObject(rinfo->clause);
- replace_varno((Node *) clause, relid, outer->relid);
+ replace_varno(root, (Node *) clause, relid, outer->relid);
iclause = bms_is_empty(rinfo->left_relids) ?
get_rightop(clause) :
get_leftop(clause);
diff --git a/src/backend/utils/cache/lsyscache.c
b/src/backend/utils/cache/lsyscache.c
index fc6d267e44..7778ed483f 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -929,6 +929,25 @@ get_attgenerated(Oid relid, AttrNumber attnum)
return result;
}
+char
+get_attnotnull(Oid relid, AttrNumber attnum)
+{
+ HeapTuple tp;
+ Form_pg_attribute att_tup;
+ char result;
+
+ tp = SearchSysCache2(ATTNUM,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(attnum));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for attribute %d of relation
%u",
+ attnum, relid);
+ att_tup = (Form_pg_attribute) GETSTRUCT(tp);
+ result = att_tup->attnotnull;
+ ReleaseSysCache(tp);
+ return result;
+}
+
/*
* get_atttype
*
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index f5fdbfe116..02521299a1 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -92,6 +92,7 @@ extern char *get_attname(Oid relid, AttrNumber attnum, bool
missing_ok);
extern AttrNumber get_attnum(Oid relid, const char *attname);
extern int get_attstattarget(Oid relid, AttrNumber attnum);
extern char get_attgenerated(Oid relid, AttrNumber attnum);
+extern char get_attnotnull(Oid relid, AttrNumber attnum);
extern Oid get_atttype(Oid relid, AttrNumber attnum);
extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum,
Oid *typid,
int32 *typmod, Oid *collid);
diff --git a/src/test/regress/expected/equivclass.out
b/src/test/regress/expected/equivclass.out
index de71441052..3d5de28354 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -438,15 +438,14 @@ set enable_mergejoin to off;
explain (costs off)
select * from ec0 m join ec0 n on m.ff = n.ff
join ec1 p on m.ff + n.ff = p.f1;
- QUERY PLAN
-----------------------------------------
+ QUERY PLAN
+---------------------------------------
Nested Loop
Join Filter: ((n.ff + n.ff) = p.f1)
- -> Seq Scan on ec1 p
+ -> Seq Scan on ec0 n
-> Materialize
- -> Seq Scan on ec0 n
- Filter: (ff IS NOT NULL)
-(6 rows)
+ -> Seq Scan on ec1 p
+(5 rows)
explain (costs off)
select * from ec0 m join ec0 n on m.ff = n.ff
@@ -455,11 +454,10 @@ explain (costs off)
---------------------------------------------------------------
Nested Loop
Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1)
- -> Seq Scan on ec1 p
+ -> Seq Scan on ec0 n
-> Materialize
- -> Seq Scan on ec0 n
- Filter: (ff IS NOT NULL)
-(6 rows)
+ -> Seq Scan on ec1 p
+(5 rows)
reset enable_mergejoin;
-- this could be converted, but isn't at present
diff --git a/src/test/regress/expected/join.out
b/src/test/regress/expected/join.out
index 027c356bcc..10b23944fe 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6337,14 +6337,14 @@ SELECT * FROM pg_am am WHERE am.amname IN (
JOIN pg_class c2
ON c1.oid=c2.oid AND c1.oid < 10
);
- QUERY PLAN
----------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------
Nested Loop Semi Join
Join Filter: (am.amname = c2.relname)
-> Seq Scan on pg_am am
-> Materialize
-> Index Scan using pg_class_oid_index on pg_class c2
- Index Cond: ((oid < '10'::oid) AND (oid IS NOT NULL))
+ Index Cond: (oid < '10'::oid)
(6 rows)
--
@@ -6599,14 +6599,14 @@ SELECT COUNT(*) FROM tab_with_flag
WHERE
(is_flag IS NULL OR is_flag = 0)
AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
- QUERY PLAN
-----------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on tab_with_flag
- Recheck Cond: ((id = ANY ('{2,3}'::integer[])) AND (id IS NOT NULL))
+ Recheck Cond: (id = ANY ('{2,3}'::integer[]))
Filter: ((is_flag IS NULL) OR (is_flag = 0))
-> Bitmap Index Scan on tab_with_flag_pkey
- Index Cond: ((id = ANY ('{2,3}'::integer[])) AND (id IS NOT
NULL))
+ Index Cond: (id = ANY ('{2,3}'::integer[]))
(6 rows)
DROP TABLE tab_with_flag;
@@ -6764,11 +6764,11 @@ reset self_join_search_limit;
CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int);
explain (verbose, costs off)
SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
- QUERY PLAN
-----------------------------------------------------------
+ QUERY PLAN
+------------------------------------------
Seq Scan on public.emp1 e2
Output: e2.id, e2.code, e2.id, e2.code
- Filter: ((e2.id IS NOT NULL) AND (e2.code <> e2.code))
+ Filter: (e2.code <> e2.code)
(3 rows)
-- We can remove the join even if we find the join can't duplicate rows and