Hi, I rebased the patch and fixed earlier problem.
On 03.09.2025 00:20, Alena Rybakina wrote:
I'll look at this. I need some time to consider it.
I'm working on this approach right now. I introduced mutator and made
the transformation if it is possible there but I need to fix some bugs.
From bea0cf815e2ac283a973deb31b2ca67bd66d7f8a Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybak...@postgrespro.ru>
Date: Fri, 5 Sep 2025 14:01:35 +0300
Subject: [PATCH] Enables pull-up of EXISTS subqueries that
contain INNER joins, unlocking join reordering and earlier filtering. OUTER
joins with outer references are safely excluded to preserve null-preserving
semantics.
To achieve this, introduce a mutator that performs a single conservative
pass over the subquery jointree and stops transformation if subquery
contains volatile quals, or OUTER joins with outer references, since
hoisting would break null-preserving behavior.
On the other hand, OUTER joins without such references remain intact.
Add IS NOT NULL guards on hoisted outer Vars to avoid redundant null elements
that obviously won't result after join operation.
Replace affected subquery quals with true.
---
src/backend/optimizer/plan/subselect.c | 611 +++++++++++---
src/backend/utils/misc/guc_tables.c | 11 +
src/backend/utils/misc/postgresql.conf.sample | 1 +
src/include/optimizer/optimizer.h | 1 +
src/test/regress/expected/subselect.out | 791 ++++++++++++++++++
src/test/regress/expected/updatable_views.out | 10 +-
src/test/regress/sql/subselect.sql | 443 ++++++++++
7 files changed, 1736 insertions(+), 132 deletions(-)
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d71ed958e31..33fc19f7abc 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1439,6 +1439,239 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
return result;
}
+typedef struct HoistJoinQualsContext
+{
+ List *outer_clauses; /* collect hoisted clauses */
+ Relids observed_nulltest_vars;
+} HoistJoinQualsContext;
+
+static Node *
+preprocess_quals(Node *node)
+{
+ /*
+ * Run const-folding without planner context.
+ *
+ * IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying
+ * a *subquery’s* quals before its rtable has been merged with the
+ * parent. If we passed a non-NULL root, eval_const_expressions()
+ * could perform root-dependent transforms (e.g., fold NullTest on Var
+ * using var_is_nonnullable) against the *wrong* rangetable, risking
+ * out-of-bounds RTE access. See eval_const_expressions()’s contract:
+ * “root can be passed as NULL …” for exactly this use-case.
+ */
+ node = eval_const_expressions(NULL, node);
+ node = (Node *) canonicalize_qual((Expr *) node, false);
+
+ node = (Node *) make_ands_implicit((Expr *) node);
+
+ return node;
+}
+
+static NullTest *
+make_nulltest(Var *var, NullTestType type)
+{
+ NullTest *nulltest = makeNode(NullTest);
+ nulltest->arg = (Expr *) var;
+ nulltest->nulltesttype = type;
+ nulltest->argisrow = false;
+ nulltest->location = -1;
+
+ return nulltest;
+}
+
+static bool
+simplicity_check_walker(Node *node, void *ctx)
+{
+ if (node == NULL)
+ {
+ return false;
+ }
+ else if(IsA(node, Var))
+ return true;
+ else if(IsA(node, Query))
+ return query_tree_walker((Query *) node,
+ simplicity_check_walker,
+ (void*) ctx,
+ QTW_EXAMINE_RTES_BEFORE);
+
+ return expression_tree_walker(node, simplicity_check_walker,
+ (void *) ctx);
+}
+
+static List *
+generate_not_null_exprs(List *list_expr, Relids *observed_vars)
+{
+ ListCell *lc;
+ List *result = NIL;
+
+ foreach(lc, list_expr)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ if (IsA(node, OpExpr))
+ {
+ Node *larg = get_leftop(node);
+ Node *rarg = get_rightop(node);
+
+ if (IsA(larg, RelabelType))
+ larg = (Node *) ((RelabelType *) larg)->arg;
+
+ if (IsA(rarg, RelabelType))
+ rarg = (Node *) ((RelabelType *) rarg)->arg;
+
+ if(IsA(larg, Var))
+ {
+ Var *var = (Var *) larg;
+ if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+ {
+ NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+ result = lappend(result, nulltest);
+ *observed_vars = bms_add_member(*observed_vars, var->varno);
+ continue;
+ }
+ }
+
+ if(IsA(rarg, Var))
+ {
+ Var *var = (Var *) rarg;
+ if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
+ {
+ NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
+ result = lappend(result, nulltest);
+ *observed_vars = bms_add_member(*observed_vars, var->varno);
+ continue;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+static Node *
+hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext *context)
+{
+ if (jtnode == NULL)
+ return NULL;
+
+ if (IsA(jtnode, RangeTblRef))
+ return jtnode; /* nothing to change */
+
+ if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ JoinExpr *newj = makeNode(JoinExpr);
+ ListCell *lc;
+ List *join_clauses = NIL;
+ Node *qual;
+ memcpy(newj, j, sizeof(JoinExpr));
+
+ /* Recurse into join inputs */
+ newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context);
+ newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context);
+
+ if(contain_volatile_functions(newj->quals) ||
+ newj->larg == NULL ||
+ newj->rarg == NULL)
+ return NULL;
+
+ qual = newj->quals;
+ qual = preprocess_quals(qual);
+
+ foreach(lc, (List *) qual)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ if (IsA(node, OpExpr))
+ {
+ if(simplicity_check_walker(get_leftop(node), NULL) &&
+ simplicity_check_walker(get_rightop(node), NULL))
+ {
+ join_clauses = lappend(join_clauses, node);
+ continue;
+ }
+ }
+ context->outer_clauses = lappend(context->outer_clauses, node);
+ }
+
+ /* Only touch INNER JOINs */
+ if ((j->jointype != JOIN_LEFT &&
+ j->jointype != JOIN_RIGHT &&
+ j->jointype != JOIN_FULL)) /* subquery vars */
+ {
+ List *null_tests;
+
+ if (join_clauses == NIL) /* subquery vars */
+ {
+ newj->quals = (Node *) makeBoolConst(true, false);
+ }
+ else if(join_clauses != NIL && contain_vars_of_level((Node *) join_clauses, 1))
+ {
+ null_tests = generate_not_null_exprs(join_clauses, &context->observed_nulltest_vars);
+ context->outer_clauses = list_concat(context->outer_clauses, null_tests);
+ context->outer_clauses = list_concat(context->outer_clauses, join_clauses);
+ newj->quals = (Node *) makeBoolConst(true, false);
+ }
+ else
+ {
+ newj->quals = (Node *) make_ands_explicit(join_clauses);
+ }
+ }
+ else
+ {
+ if (contain_vars_of_level(j->quals, 1))
+ return NULL;
+ }
+
+ return (Node *) newj;
+ }
+
+ if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ FromExpr *newf = makeNode(FromExpr);
+ ListCell *lc;
+ List *fromlist = NIL;
+
+ /* Recurse into fromlist */
+ memcpy(newf, f, sizeof(FromExpr));
+
+ /*
+ * Process children, if any of their jointree contains Vars of the
+ * parent query or quals of their JoinExpr contains volatile functions
+ * then exit
+ */
+ foreach(lc, newf->fromlist)
+ {
+ Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context);
+
+ if (fnode == NULL)
+ return NULL;
+ fromlist = lappend(fromlist, fnode);
+ }
+
+ newf->fromlist = fromlist;
+
+ if(contain_volatile_functions(newf->quals))
+ return NULL;
+
+ if(newf->quals)
+ {
+ Node *qual = newf->quals;
+ /* Quals (WHERE clause) may still contain sublinks etc */
+ qual = preprocess_quals(qual);
+ context->outer_clauses = list_concat(context->outer_clauses, (List *) qual);
+ newf->quals = NULL;
+ }
+
+ return (Node *) newf;
+ }
+
+ return jtnode; /* quiet compiler */
+}
+
+bool pull_up_with_joins = true;
+
/*
* convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
*
@@ -1453,12 +1686,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
JoinExpr *result;
Query *parse = root->parse;
Query *subselect = (Query *) sublink->subselect;
- Node *whereClause;
+ Node *whereClause = NULL;
PlannerInfo subroot;
int rtoffset;
int varno;
Relids clause_varnos;
Relids upper_varnos;
+ List *newWhere = NIL;
Assert(sublink->subLinkType == EXISTS_SUBLINK);
@@ -1488,145 +1722,270 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
if (!simplify_EXISTS_query(root, subselect))
return NULL;
- /*
- * Separate out the WHERE clause. (We could theoretically also remove
- * top-level plain JOIN/ON clauses, but it's probably not worth the
- * trouble.)
- */
- whereClause = subselect->jointree->quals;
- subselect->jointree->quals = NULL;
- /*
- * The rest of the sub-select must not refer to any Vars of the parent
- * query. (Vars of higher levels should be okay, though.)
- */
- if (contain_vars_of_level((Node *) subselect, 1))
- return NULL;
+ if(pull_up_with_joins)
+ {
+ HoistJoinQualsContext hjq_context = {NIL, NULL};
- /*
- * On the other hand, the WHERE clause must contain some Vars of the
- * parent query, else it's not gonna be a join.
- */
- if (!contain_vars_of_level(whereClause, 1))
- return NULL;
- /*
- * We don't risk optimizing if the WHERE clause is volatile, either.
- */
- if (contain_volatile_functions(whereClause))
- return NULL;
+ /*
+ * Scan the rangetable for relation RTEs and retrieve the necessary
+ * catalog information for each relation. Using this information, clear
+ * the inh flag for any relation that has no children, collect not-null
+ * attribute numbers for any relation that has column not-null
+ * constraints, and expand virtual generated columns for any relation that
+ * contains them.
+ *
+ * Note: we construct up an entirely dummy PlannerInfo for use here. This
+ * is fine because only the "glob" and "parse" links will be used in this
+ * case.
+ *
+ * Note: we temporarily assign back the WHERE clause so that any virtual
+ * generated column references within it can be expanded. It should be
+ * separated out again afterward.
+ */
+ MemSet(&subroot, 0, sizeof(subroot));
+ subroot.type = T_PlannerInfo;
+ subroot.glob = root->glob;
+ subroot.parse = subselect;
+ subselect = preprocess_relation_rtes(&subroot);
+
+ subselect->jointree = (FromExpr * ) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, &hjq_context);
+
+ if(subselect->jointree == NULL || hjq_context.outer_clauses == NIL)
+ return NULL;
- /*
- * Scan the rangetable for relation RTEs and retrieve the necessary
- * catalog information for each relation. Using this information, clear
- * the inh flag for any relation that has no children, collect not-null
- * attribute numbers for any relation that has column not-null
- * constraints, and expand virtual generated columns for any relation that
- * contains them.
- *
- * Note: we construct up an entirely dummy PlannerInfo for use here. This
- * is fine because only the "glob" and "parse" links will be used in this
- * case.
- *
- * Note: we temporarily assign back the WHERE clause so that any virtual
- * generated column references within it can be expanded. It should be
- * separated out again afterward.
- */
- MemSet(&subroot, 0, sizeof(subroot));
- subroot.type = T_PlannerInfo;
- subroot.glob = root->glob;
- subroot.parse = subselect;
- subselect->jointree->quals = whereClause;
- subselect = preprocess_relation_rtes(&subroot);
+ newWhere = hjq_context.outer_clauses;
- /*
- * Now separate out the WHERE clause again.
- */
- whereClause = subselect->jointree->quals;
- subselect->jointree->quals = NULL;
+ bms_free(hjq_context.observed_nulltest_vars);
- /*
- * The subquery must have a nonempty jointree, but we can make it so.
- */
- replace_empty_jointree(subselect);
+ subselect->jointree->quals = NULL;
- /*
- * Prepare to pull up the sub-select into top range table.
- *
- * We rely here on the assumption that the outer query has no references
- * to the inner (necessarily true). Therefore this is a lot easier than
- * what pull_up_subqueries has to go through.
- *
- * In fact, it's even easier than what convert_ANY_sublink_to_join has to
- * do. The machinations of simplify_EXISTS_query ensured that there is
- * nothing interesting in the subquery except an rtable and jointree, and
- * even the jointree FromExpr no longer has quals. So we can just append
- * the rtable to our own and use the FromExpr in our jointree. But first,
- * adjust all level-zero varnos in the subquery to account for the rtable
- * merger.
- */
- rtoffset = list_length(parse->rtable);
- OffsetVarNodes((Node *) subselect, rtoffset, 0);
- OffsetVarNodes(whereClause, rtoffset, 0);
+ /*
+ * The subquery must have a nonempty jointree, but we can make it so.
+ */
+ replace_empty_jointree(subselect);
- /*
- * Upper-level vars in subquery will now be one level closer to their
- * parent than before; in particular, anything that had been level 1
- * becomes level zero.
- */
- IncrementVarSublevelsUp((Node *) subselect, -1, 1);
- IncrementVarSublevelsUp(whereClause, -1, 1);
+ /*
+ * Prepare to pull up the sub-select into top range table.
+ *
+ * We rely here on the assumption that the outer query has no references
+ * to the inner (necessarily true). Therefore this is a lot easier than
+ * what pull_up_subqueries has to go through.
+ *
+ * In fact, it's even easier than what convert_ANY_sublink_to_join has to
+ * do. The machinations of simplify_EXISTS_query ensured that there is
+ * nothing interesting in the subquery except an rtable and jointree, and
+ * even the jointree FromExpr no longer has quals. So we can just append
+ * the rtable to our own and use the FromExpr in our jointree. But first,
+ * adjust all level-zero varnos in the subquery to account for the rtable
+ * merger.
+ */
+ rtoffset = list_length(parse->rtable);
+ OffsetVarNodes((Node *) subselect, rtoffset, 0);
- /*
- * Now that the WHERE clause is adjusted to match the parent query
- * environment, we can easily identify all the level-zero rels it uses.
- * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
- * not.
- */
- clause_varnos = pull_varnos(root, whereClause);
- upper_varnos = NULL;
- varno = -1;
- while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
- {
- if (varno <= rtoffset)
- upper_varnos = bms_add_member(upper_varnos, varno);
- }
- bms_free(clause_varnos);
- Assert(!bms_is_empty(upper_varnos));
+ /*
+ * Upper-level vars in subquery will now be one level closer to their
+ * parent than before; in particular, anything that had been level 1
+ * becomes level zero.
+ */
+ IncrementVarSublevelsUp((Node *) subselect, -1, 1);
- /*
- * Now that we've got the set of upper-level varnos, we can make the last
- * check: only available_rels can be referenced.
- */
- if (!bms_is_subset(upper_varnos, available_rels))
- return NULL;
+ OffsetVarNodes((Node *) newWhere, rtoffset, 0);
+ IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
- /*
- * Now we can attach the modified subquery rtable to the parent. This also
- * adds subquery's RTEPermissionInfos into the upper query.
- */
- CombineRangeTables(&parse->rtable, &parse->rteperminfos,
- subselect->rtable, subselect->rteperminfos);
+ /*
+ * Now that the WHERE clause is adjusted to match the parent query
+ * environment, we can easily identify all the level-zero rels it uses.
+ * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+ * not.
+ */
+ clause_varnos = pull_varnos(root, (Node *) newWhere);
+ upper_varnos = NULL;
+ varno = -1;
+ while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+ {
+ if (varno <= rtoffset)
+ upper_varnos = bms_add_member(upper_varnos, varno);
+ }
+ bms_free(clause_varnos);
- /*
- * And finally, build the JoinExpr node.
- */
- result = makeNode(JoinExpr);
- result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
- result->isNatural = false;
- result->larg = NULL; /* caller must fill this in */
- /* flatten out the FromExpr node if it's useless */
- if (list_length(subselect->jointree->fromlist) == 1)
- result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+ /*
+ * Now that we've got the set of upper-level varnos, we can make the last
+ * check: only available_rels can be referenced.
+ */
+ if (!bms_is_empty(upper_varnos) && !bms_is_subset(upper_varnos, available_rels))
+ return NULL;
+
+ /*
+ * Now we can attach the modified subquery rtable to the parent. This also
+ * adds subquery's RTEPermissionInfos into the upper query.
+ */
+ CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+ subselect->rtable, subselect->rteperminfos);
+
+ /*
+ * And finally, build the JoinExpr node.
+ */
+ result = makeNode(JoinExpr);
+ result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+ result->isNatural = false;
+ result->larg = NULL; /* caller must fill this in */
+ /* flatten out the FromExpr node if it's useless */
+ if (list_length(subselect->jointree->fromlist) == 1)
+ result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+ else
+ result->rarg = (Node *) subselect->jointree;
+ result->usingClause = NIL;
+ result->join_using_alias = NULL;
+ result->alias = NULL;
+ result->rtindex = 0; /* we don't need an RTE for it */
+ result->quals = (Node *) make_ands_explicit(newWhere);
+
+ return result;
+ }
else
- result->rarg = (Node *) subselect->jointree;
- result->usingClause = NIL;
- result->join_using_alias = NULL;
- result->quals = whereClause;
- result->alias = NULL;
- result->rtindex = 0; /* we don't need an RTE for it */
+ {
+ /*
+ * Separate out the WHERE clause. (We could theoretically also remove
+ * top-level plain JOIN/ON clauses, but it's probably not worth the
+ * trouble.)
+ */
+ whereClause = subselect->jointree->quals;
+ subselect->jointree->quals = NULL;
- return result;
+ /*
+ * The rest of the sub-select must not refer to any Vars of the parent
+ * query. (Vars of higher levels should be okay, though.)
+ */
+ if (contain_vars_of_level((Node *) subselect, 1))
+ return NULL;
+
+ /*
+ * On the other hand, the WHERE clause must contain some Vars of the
+ * parent query, else it's not gonna be a join.
+ */
+ if (!contain_vars_of_level(whereClause, 1))
+ return NULL;
+
+ /*
+ * We don't risk optimizing if the WHERE clause is volatile, either.
+ */
+ if (contain_volatile_functions(whereClause))
+ return NULL;
+
+ /*
+ * Scan the rangetable for relation RTEs and retrieve the necessary
+ * catalog information for each relation. Using this information, clear
+ * the inh flag for any relation that has no children, collect not-null
+ * attribute numbers for any relation that has column not-null
+ * constraints, and expand virtual generated columns for any relation that
+ * contains them.
+ *
+ * Note: we construct up an entirely dummy PlannerInfo for use here. This
+ * is fine because only the "glob" and "parse" links will be used in this
+ * case.
+ *
+ * Note: we temporarily assign back the WHERE clause so that any virtual
+ * generated column references within it can be expanded. It should be
+ * separated out again afterward.
+ */
+ MemSet(&subroot, 0, sizeof(subroot));
+ subroot.type = T_PlannerInfo;
+ subroot.glob = root->glob;
+ subroot.parse = subselect;
+ subselect->jointree->quals = whereClause;
+ subselect = preprocess_relation_rtes(&subroot);
+
+ /*
+ * Now separate out the WHERE clause again.
+ */
+ whereClause = subselect->jointree->quals;
+ subselect->jointree->quals = NULL;
+
+ /*
+ * The subquery must have a nonempty jointree, but we can make it so.
+ */
+ replace_empty_jointree(subselect);
+
+ /*
+ * Prepare to pull up the sub-select into top range table.
+ *
+ * We rely here on the assumption that the outer query has no references
+ * to the inner (necessarily true). Therefore this is a lot easier than
+ * what pull_up_subqueries has to go through.
+ *
+ * In fact, it's even easier than what convert_ANY_sublink_to_join has to
+ * do. The machinations of simplify_EXISTS_query ensured that there is
+ * nothing interesting in the subquery except an rtable and jointree, and
+ * even the jointree FromExpr no longer has quals. So we can just append
+ * the rtable to our own and use the FromExpr in our jointree. But first,
+ * adjust all level-zero varnos in the subquery to account for the rtable
+ * merger.
+ */
+ rtoffset = list_length(parse->rtable);
+ OffsetVarNodes((Node *) subselect, rtoffset, 0);
+ OffsetVarNodes(whereClause, rtoffset, 0);
+
+ /*
+ * Upper-level vars in subquery will now be one level closer to their
+ * parent than before; in particular, anything that had been level 1
+ * becomes level zero.
+ */
+ IncrementVarSublevelsUp((Node *) subselect, -1, 1);
+ IncrementVarSublevelsUp(whereClause, -1, 1);
+
+ /*
+ * Now that the WHERE clause is adjusted to match the parent query
+ * environment, we can easily identify all the level-zero rels it uses.
+ * The ones <= rtoffset belong to the upper query; the ones > rtoffset do
+ * not.
+ */
+ clause_varnos = pull_varnos(root, whereClause);
+ upper_varnos = NULL;
+ varno = -1;
+ while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
+ {
+ if (varno <= rtoffset)
+ upper_varnos = bms_add_member(upper_varnos, varno);
+ }
+ bms_free(clause_varnos);
+ Assert(!bms_is_empty(upper_varnos));
+
+ /*
+ * Now that we've got the set of upper-level varnos, we can make the last
+ * check: only available_rels can be referenced.
+ */
+ if (!bms_is_subset(upper_varnos, available_rels))
+ return NULL;
+
+ /*
+ * Now we can attach the modified subquery rtable to the parent. This also
+ * adds subquery's RTEPermissionInfos into the upper query.
+ */
+ CombineRangeTables(&parse->rtable, &parse->rteperminfos,
+ subselect->rtable, subselect->rteperminfos);
+
+ /*
+ * And finally, build the JoinExpr node.
+ */
+ result = makeNode(JoinExpr);
+ result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+ result->isNatural = false;
+ result->larg = NULL; /* caller must fill this in */
+ /* flatten out the FromExpr node if it's useless */
+ if (list_length(subselect->jointree->fromlist) == 1)
+ result->rarg = (Node *) linitial(subselect->jointree->fromlist);
+ else
+ result->rarg = (Node *) subselect->jointree;
+ result->usingClause = NIL;
+ result->join_using_alias = NULL;
+ result->quals = whereClause;
+ result->alias = NULL;
+ result->rtindex = 0; /* we don't need an RTE for it */
+
+ return result;
+ }
}
/*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index f137129209f..0a6ea94c320 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1060,6 +1060,17 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"pull_up_with_joins", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enables the planner's use of exists pull-up with join expressions."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &pull_up_with_joins,
+ true,
+ NULL, NULL, NULL
+ },
+
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474a..c43f2a1a2d2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -427,6 +427,7 @@
#enable_tidscan = on
#enable_group_by_reordering = on
#enable_distinct_reordering = on
+#pull_up_with_joins = on
#enable_self_join_elimination = on
# - Planner Cost Constants -
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 37bc13c2cbd..fa8db1362c7 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -112,6 +112,7 @@ typedef enum
extern PGDLLIMPORT int debug_parallel_query;
extern PGDLLIMPORT bool parallel_leader_participation;
extern PGDLLIMPORT bool enable_distinct_reordering;
+extern PGDLLIMPORT bool pull_up_with_joins;
extern struct PlannedStmt *planner(Query *parse, const char *query_string,
int cursorOptions,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 0563d0cd5a1..236a6bbd086 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1019,6 +1019,797 @@ where exists (
where road.name = ss.f1 );
rollback;
--
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON 1 = 1
+ WHERE ta.id = tc.id
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+(6 rows)
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = tb.id
+);
+ QUERY PLAN
+------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (tc.id = ta.id)
+ -> Hash Join
+ Hash Cond: (tb.id = tc.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tb.aval = tc.aid
+ AND tb.aval = ta1.id
+);
+ QUERY PLAN
+----------------------------------------------------
+ Hash Join
+ Hash Cond: (ta1.id = tb.aval)
+ -> Seq Scan on ta ta1
+ -> Hash
+ -> Unique
+ -> Merge Join
+ Merge Cond: (tb.aval = tc.aid)
+ -> Sort
+ Sort Key: tb.aval
+ -> Seq Scan on tb
+ -> Sort
+ Sort Key: tc.aid
+ -> Seq Scan on tc
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+ SELECT 1
+ FROM tb tb1
+ JOIN tc ON ta.id = tb.id
+);
+ QUERY PLAN
+------------------------------------
+ Nested Loop Semi Join
+ -> Hash Join
+ Hash Cond: (ta.id = tb.id)
+ -> Seq Scan on ta
+ -> Hash
+ -> Seq Scan on tb
+ -> Nested Loop
+ -> Seq Scan on tb tb1
+ -> Materialize
+ -> Seq Scan on tc
+(10 rows)
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = 1
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Index Only Scan using ta_pkey on ta
+ Index Cond: (id = 1)
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = 1)
+ -> Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND tb.id = 1
+);
+ QUERY PLAN
+-------------------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (tc.id = ta.id)
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = 1)
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+ WHERE ta.val = 1
+);
+ QUERY PLAN
+----------------------------------------------
+ Seq Scan on ta
+ Filter: EXISTS(SubPlan 1)
+ SubPlan 1
+ -> Result
+ One-Time Filter: (ta.val = 1)
+ -> Nested Loop Left Join
+ Join Filter: (ta.id = tc.id)
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on tb
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND tb.aval = ANY ('{1}'::int[])
+);
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (ta.id = tb.id)
+ -> Seq Scan on ta
+ -> Hash
+ -> HashAggregate
+ Group Key: tb.id
+ -> Nested Loop
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on tb
+ Filter: (aval = ANY ('{1}'::integer[]))
+(11 rows)
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.id = ta1.id
+ AND ta1.val = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta1.id
+ )
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta ta2
+ Filter: (val = 1)
+ -> Nested Loop
+ -> Index Only Scan using ta_pkey on ta
+ Index Cond: (id = ta2.id)
+ -> Seq Scan on tb
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.val = ta1.id
+ AND ta1.id = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta.id
+ )
+);
+ QUERY PLAN
+-----------------------------------------------
+ Nested Loop Semi Join
+ -> Index Only Scan using ta_pkey on ta ta1
+ Index Cond: (id = 1)
+ -> Nested Loop
+ -> Seq Scan on tb
+ -> Materialize
+ -> Seq Scan on ta ta2
+ Filter: (val = 1)
+(8 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + tb.aval > 0
+ )
+);
+ QUERY PLAN
+-----------------------------------------------------
+ Hash Semi Join
+ Hash Cond: (ta.id = tc.id)
+ -> Seq Scan on ta
+ -> Hash
+ -> Hash Join
+ Hash Cond: (tb.id = tc.id)
+ Join Filter: ((tc.aid + tb.aval) > 0)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on tc
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + ta.val > 0
+ )
+);
+ QUERY PLAN
+----------------------------------------
+ Hash Join
+ Hash Cond: (ta.id = tb.id)
+ Join Filter: EXISTS(SubPlan 1)
+ -> Seq Scan on ta
+ -> Hash
+ -> Seq Scan on tb
+ SubPlan 1
+ -> Index Scan using tc_pkey on tc
+ Index Cond: (id = tb.id)
+ Filter: ((aid + ta.val) > 0)
+(10 rows)
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: COALESCE(is_active, true)
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+ QUERY PLAN
+-------------------------------------------------------
+ Merge Join
+ Merge Cond: (tb.id = ta.id)
+ -> Index Only Scan using tb_pkey on tb
+ -> Unique
+ -> Nested Loop
+ -> Index Scan using ta_pkey on ta
+ Filter: COALESCE(is_active, true)
+ -> Materialize
+ -> Seq Scan on tc
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+ QUERY PLAN
+--------------------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: CASE WHEN is_active THEN true ELSE false END
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+ QUERY PLAN
+--------------------------------------------------------------------------
+ Merge Join
+ Merge Cond: (tb.id = ta.id)
+ -> Index Only Scan using tb_pkey on tb
+ -> Unique
+ -> Nested Loop
+ -> Index Scan using ta_pkey on ta
+ Filter: CASE WHEN is_active THEN true ELSE false END
+ -> Materialize
+ -> Seq Scan on tc
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: is_active
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active IS NOT NULL
+);
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ Filter: (is_active IS NOT NULL)
+ -> Nested Loop
+ -> Index Only Scan using tb_pkey on tb
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tc
+(7 rows)
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+);
+ QUERY PLAN
+----------------------------------------
+ Seq Scan on ta
+ Filter: EXISTS(SubPlan 1)
+ SubPlan 1
+ -> Nested Loop Left Join
+ Join Filter: (ta.id = tc.id)
+ -> Seq Scan on tc
+ -> Materialize
+ -> Seq Scan on tb
+(8 rows)
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+);
+ QUERY PLAN
+---------------------------------------------------
+ Result
+ One-Time Filter: (InitPlan 1).col1
+ InitPlan 1
+ -> Nested Loop
+ -> Seq Scan on tb
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = tb.id)
+ -> Seq Scan on ta
+(8 rows)
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+UNION ALL
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+UNION ALL
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+UNION ALL
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id)
+ );
+ QUERY PLAN
+------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (tc.id = ta.id)
+ -> Hash Join
+ Hash Cond: (tb.id = tc.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = ta.id)
+ );
+ QUERY PLAN
+------------------------------------
+ Hash Join
+ Hash Cond: (tc.id = tb.id)
+ -> Hash Join
+ Hash Cond: (tc.id = ta.id)
+ -> Seq Scan on tc
+ -> Hash
+ -> Seq Scan on ta
+ -> Hash
+ -> Seq Scan on tb
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id
+ AND EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tb.id = ta.id)
+ );
+ QUERY PLAN
+--------------------------------------------
+ Hash Join
+ Hash Cond: (tb.id = ta.id)
+ Join Filter: EXISTS(SubPlan 1)
+ -> Seq Scan on tb
+ -> Hash
+ -> Seq Scan on ta
+ SubPlan 1
+ -> Result
+ One-Time Filter: (tb.id = ta.id)
+ -> Seq Scan on tc
+(10 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE td.id = ta.id)
+ );
+ QUERY PLAN
+-------------------------------------------------------
+ Hash Join
+ Hash Cond: (ta.id = td.id)
+ -> Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+ -> Hash
+ -> HashAggregate
+ Group Key: td.id
+ -> Seq Scan on td
+(12 rows)
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE tb.id = ta.id)
+ );
+ QUERY PLAN
+--------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on ta
+ -> Nested Loop
+ Join Filter: EXISTS(SubPlan 1)
+ -> Index Only Scan using tc_pkey on tc
+ Index Cond: (id = ta.id)
+ -> Seq Scan on tb
+ SubPlan 1
+ -> Result
+ One-Time Filter: (tb.id = ta.id)
+ -> Seq Scan on td
+(11 rows)
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tb t2 ON t2.id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM te t4
+ WHERE t4.tc_id = t3.tc_id
+ AND t4.val = t2.aval
+ ) = EXISTS (
+ SELECT 1
+ FROM tc t5
+ WHERE t5.id = t3.id
+ )
+ )
+);
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------
+ Merge Semi Join
+ Merge Cond: (t1.id = t2.id)
+ -> Index Scan using ta_pkey on ta t1
+ -> Nested Loop Semi Join
+ Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan 2).col1) AND (t2.aval = (hashed SubPlan 2).col2))) = (ANY (t3.id = (hashed SubPlan 4).col1)))
+ -> Index Scan using tb_pkey on tb t2
+ -> Materialize
+ -> Seq Scan on td t3
+ Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+ SubPlan 2
+ -> Seq Scan on te t4
+ SubPlan 4
+ -> Seq Scan on tc t5
+(13 rows)
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+ AND tb.id = ta.id
+ JOIN td ON td.id = tc.id
+);
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Hash Right Semi Join (cost=181.20..219.35 rows=1100 width=9)
+ Hash Cond: (td.id = ta.id)
+ -> Hash Join (cost=121.70..150.02 rows=1200 width=12)
+ Hash Cond: (td.id = tc.id)
+ -> Hash Join (cost=60.85..86.01 rows=1200 width=8)
+ Hash Cond: (td.id = tb.id)
+ -> Seq Scan on td (cost=0.00..22.00 rows=1200 width=4)
+ -> Hash (cost=32.60..32.60 rows=2260 width=4)
+ -> Seq Scan on tb (cost=0.00..32.60 rows=2260 width=4)
+ -> Hash (cost=32.60..32.60 rows=2260 width=4)
+ -> Seq Scan on tc (cost=0.00..32.60 rows=2260 width=4)
+ -> Hash (cost=32.00..32.00 rows=2200 width=9)
+ -> Seq Scan on ta (cost=0.00..32.00 rows=2200 width=9)
+(13 rows)
+
+DROP TABLE td, te;
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+ FROM generate_series(1, 5) AS g(id);
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst2 t2 ON t2.tst1_id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM tst4 t4
+ WHERE t4.tst3_id = t3.id
+ AND t4.type_id = t2.type_id
+ ) = EXISTS (
+ SELECT 1
+ FROM tst5 t5
+ WHERE t5.tst3_id = t3.id
+ )
+ )
+);
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (t2.tst1_id = t1.id)
+ -> Nested Loop Semi Join
+ Join Filter: (EXISTS(SubPlan 1) = EXISTS(SubPlan 3))
+ -> Seq Scan on tst2 t2
+ -> Materialize
+ -> Seq Scan on tst3 t3
+ Filter: (id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[]))
+ SubPlan 1
+ -> Seq Scan on tst4 t4
+ Filter: ((tst3_id = t3.id) AND (type_id = t2.type_id))
+ SubPlan 3
+ -> Seq Scan on tst5 t5
+ Filter: (tst3_id = t3.id)
+ -> Hash
+ -> Seq Scan on tst1 t1
+ Filter: (id IS NOT NULL)
+(17 rows)
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
-- Test case for sublinks pushed down into subselects via join alias expansion
--
select
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 095df0a670c..d0762c1299e 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -3177,14 +3177,12 @@ EXPLAIN (costs off) INSERT INTO rw_view1 VALUES (2, 'New row 2');
One-Time Filter: ((InitPlan 1).col1 IS NOT TRUE)
Update on base_tbl
- InitPlan 1
- -> Index Only Scan using base_tbl_pkey on base_tbl t
- Index Cond: (id = 2)
- -> Result
- One-Time Filter: (InitPlan 1).col1
+ -> Nested Loop Semi Join
-> Index Scan using base_tbl_pkey on base_tbl
Index Cond: (id = 2)
-(15 rows)
+ -> Index Scan using base_tbl_pkey on base_tbl t
+ Index Cond: (id = 2)
+(13 rows)
INSERT INTO rw_view1 VALUES (2, 'New row 2');
SELECT * FROM base_tbl;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index a6d276a115b..ca31e47c973 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -507,6 +507,449 @@ where exists (
rollback;
--
+-- Test case for exist sublink where we can consider some undependent expression
+-- with outer link
+--
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON 1 = 1
+ WHERE ta.id = tc.id
+);
+
+-- Join compound expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = tb.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tb.aval = tc.aid
+ AND tb.aval = ta1.id
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+JOIN tb ON true
+WHERE EXISTS (
+ SELECT 1
+ FROM tb tb1
+ JOIN tc ON ta.id = tb.id
+);
+
+-- Compound expression with const type or other type of expressions
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND ta.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tc.id
+ AND tb.id = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+ WHERE ta.val = 1
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND tb.aval = ANY ('{1}'::int[])
+);
+
+-- Exists SubLink expression within expression
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.id = ta1.id
+ AND ta1.val = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta1.id
+ )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta ta1
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tb ON ta.val = ta1.id
+ AND ta1.id = 1
+ WHERE EXISTS (
+ SELECT 1
+ FROM ta ta2
+ WHERE ta2.id = ta.id
+ )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + tb.aval > 0
+ )
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE ta.id = tb.id
+ AND EXISTS (
+ SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id
+ AND tc.aid + ta.val > 0
+ )
+);
+
+-- Check with NULL and NOT NULL expressions
+ALTER TABLE ta ADD COLUMN is_active bool;
+UPDATE ta SET is_active = true;
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND COALESCE(ta.is_active, true)
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM tb
+WHERE EXISTS (
+ SELECT 1
+ FROM ta
+ JOIN tc ON ta.id = tb.id
+ AND CASE
+ WHEN ta.is_active THEN true
+ ELSE false
+ END = true
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active
+);
+
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON ta.id = tb.id
+ AND ta.is_active IS NOT NULL
+);
+
+
+-- Disabled pull up because it is applcapable for INNER JOIN connection
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ RIGHT JOIN tc ON ta.id = tc.id
+);
+
+-- Disable pull-up due to lack of the outer var
+EXPLAIN (COSTS OFF)
+SELECT 1
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+);
+
+CREATE TABLE td (id int, tc_id bytea, val int);
+
+INSERT INTO td
+SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val
+FROM generate_series(1, 25) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val
+FROM generate_series(26, 50) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val
+FROM generate_series(51, 75) AS g(id)
+
+UNION ALL
+
+SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val
+FROM generate_series(76, 100) AS g(id);
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = tb.id)
+ );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id AND
+ EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tc.id = ta.id)
+ );
+
+EXPLAIN (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ WHERE tb.id = ta.id
+ AND EXISTS
+ (SELECT 1
+ FROM tc
+ WHERE tb.id = ta.id)
+ );
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE td.id = ta.id)
+ );
+
+explain (COSTS OFF)
+ SELECT ta.id
+ FROM ta
+ WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ join tc on tc.id = ta.id
+ AND EXISTS (
+ SELECT 1
+ FROM td
+ WHERE tb.id = ta.id)
+ );
+
+CREATE TABLE te (id int, tc_id bytea, val int);
+INSERT INTO te SELECT * FROM td;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM ta t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tb t2 ON t2.id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM te t4
+ WHERE t4.tc_id = t3.tc_id
+ AND t4.val = t2.aval
+ ) = EXISTS (
+ SELECT 1
+ FROM tc t5
+ WHERE t5.id = t3.id
+ )
+ )
+);
+
+EXPLAIN
+SELECT ta.*
+FROM ta
+WHERE EXISTS (
+ SELECT 1
+ FROM tb
+ JOIN tc ON tc.id = tb.id
+ AND tb.id = ta.id
+ JOIN td ON td.id = tc.id
+);
+
+DROP TABLE td, te;
+
+CREATE TABLE tst1 (id int, mes varchar(20));
+CREATE TABLE tst2 (id int, tst1_id int, type_id int);
+CREATE TABLE tst3 (id bytea);
+CREATE TABLE tst4 (id int, tst3_id bytea, type_id int);
+CREATE TABLE tst5 (id int, tst3_id bytea, type_id int);
+
+INSERT INTO tst1 VALUES (1, 'test1');
+INSERT INTO tst1 VALUES (2, 'test2');
+INSERT INTO tst1 VALUES (3, 'test3');
+INSERT INTO tst2 VALUES (1, 2, 7);
+INSERT INTO tst2 VALUES (1, 2, 6);
+INSERT INTO tst2 VALUES (2, 3, 7);
+
+INSERT INTO tst3
+SELECT ('Test' || g.id)::bytea AS id
+ FROM generate_series(1, 5) AS g(id);
+
+INSERT INTO tst4
+(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(1, 25) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(26, 50) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id
+ FROM generate_series(51, 75) AS g(id))
+UNION ALL
+(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id
+ FROM generate_series(76, 100) AS g(id));
+INSERT INTO tst5 SELECT * FROM tst4;
+
+VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5;
+
+-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query
+-- and t2.type-id is the reference to grandparent query
+EXPLAIN (COSTS OFF)
+SELECT t1.*
+FROM tst1 t1
+WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst2 t2 ON t2.tst1_id = t1.id
+ WHERE EXISTS (
+ SELECT 1
+ FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL
+ JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea)
+ WHERE EXISTS (
+ SELECT 1
+ FROM tst4 t4
+ WHERE t4.tst3_id = t3.id
+ AND t4.type_id = t2.type_id
+ ) = EXISTS (
+ SELECT 1
+ FROM tst5 t5
+ WHERE t5.tst3_id = t3.id
+ )
+ )
+);
+
+DROP TABLE tst1, tst2, tst3, tst4, tst5;
+
-- Test case for sublinks pushed down into subselects via join alias expansion
--
--
2.34.1