Here is a version that compiles. -- Alexander Kuzmenkov Postgres Professional: http://www.postgrespro.com The Russian Postgres Company
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 3bb91c9..62d46a1 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -166,10 +166,13 @@ _equalVar(const Var *a, const Var *b) COMPARE_SCALAR_FIELD(vartypmod); COMPARE_SCALAR_FIELD(varcollid); COMPARE_SCALAR_FIELD(varlevelsup); - COMPARE_SCALAR_FIELD(varnoold); - COMPARE_SCALAR_FIELD(varoattno); COMPARE_LOCATION_FIELD(location); + /* + * varnoold/varoattno are used only for debugging and may differ even + * when the variables are logically the same. + */ + return true; } diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index f295558..fb07ca0 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -2964,7 +2964,8 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, * relation_has_unique_index_for * Determine whether the relation provably has at most one row satisfying * a set of equality conditions, because the conditions constrain all - * columns of some unique index. + * columns of some unique index. If index_info is not null, it is set to + * point to a new UniqueIndexInfo containing the index and conditions. * * The conditions can be represented in either or both of two ways: * 1. A list of RestrictInfo nodes, where the caller has already determined @@ -2985,7 +2986,8 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, - List *exprlist, List *oprlist) + List *exprlist, List *oprlist, + UniqueIndexInfo **index_info) { ListCell *ic; @@ -3041,6 +3043,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, { IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic); int c; + List *matched_restrictlist = NIL; /* * If the index is not unique, or not immediately enforced, or if it's @@ -3089,6 +3092,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, if (match_index_to_operand(rexpr, c, ind)) { matched = true; /* column is unique */ + matched_restrictlist = lappend(matched_restrictlist, rinfo); break; } } @@ -3131,7 +3135,22 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, /* Matched all columns of this index? */ if (c == ind->ncolumns) + { + if (index_info != NULL) + { + /* This may be called in GEQO memory context. */ + MemoryContext oldContext = MemoryContextSwitchTo(root->planner_cxt); + *index_info = palloc(sizeof(UniqueIndexInfo)); + (*index_info)->index = ind; + (*index_info)->clauses = list_copy(matched_restrictlist); + MemoryContextSwitchTo(oldContext); + } + if (matched_restrictlist) + list_free(matched_restrictlist); return true; + } + if (matched_restrictlist) + list_free(matched_restrictlist); } return false; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 642f951..2edcf22 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -176,7 +176,8 @@ add_paths_to_joinrel(PlannerInfo *root, innerrel, JOIN_INNER, restrictlist, - false); + false, + NULL /*index_info*/); break; default: extra.inner_unique = innerrel_is_unique(root, @@ -185,7 +186,8 @@ add_paths_to_joinrel(PlannerInfo *root, innerrel, jointype, restrictlist, - false); + false, + NULL /*index_info*/); break; } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 0e73f9c..2bb3a10 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,12 +22,15 @@ */ #include "postgres.h" +#include "catalog/pg_class.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" +#include "optimizer/predtest.h" +#include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "optimizer/var.h" #include "utils/lsyscache.h" @@ -39,14 +42,15 @@ static void remove_rel_from_query(PlannerInfo *root, int relid, static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, - List *clause_list); + List *clause_list, UniqueIndexInfo **info); static Oid distinct_col_search(int colno, List *colnos, List *opids); static bool is_innerrel_unique_for(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist); + List *restrictlist, + UniqueIndexInfo **info); /* @@ -58,7 +62,7 @@ static bool is_innerrel_unique_for(PlannerInfo *root, * data structures that have to be updated are accessible via "root". */ List * -remove_useless_joins(PlannerInfo *root, List *joinlist) +remove_useless_left_joins(PlannerInfo *root, List *joinlist) { ListCell *lc; @@ -162,7 +166,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) int innerrelid; RelOptInfo *innerrel; Relids joinrelids; - List *clause_list = NIL; ListCell *l; int attroff; @@ -238,67 +241,24 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) } /* - * Search for mergejoinable clauses that constrain the inner rel against - * either the outer rel or a pseudoconstant. If an operator is - * mergejoinable then it behaves like equality for some btree opclass, so - * it's what we want. The mergejoinability test also eliminates clauses - * containing volatile functions, which we couldn't depend on. + * Check for pushed-down clauses referencing the inner rel. If there is + * such a clause then join removal has to be disallowed. We have to + * check this despite the previous attr_needed checks because of the + * possibility of pushed-down clauses referencing the rel. */ foreach(l, innerrel->joininfo) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - - /* - * If it's not a join clause for this outer join, we can't use it. - * Note that if the clause is pushed-down, then it is logically from - * above the outer join, even if it references no other rels (it might - * be from WHERE, for example). - */ - if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) - { - /* - * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. - */ - if (bms_is_member(innerrelid, restrictinfo->clause_relids)) + if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids) + && bms_is_member(innerrel->relid, restrictinfo->clause_relids)) return false; - continue; /* else, ignore; not useful here */ - } - - /* Ignore if it's not a mergejoinable clause */ - if (!restrictinfo->can_join || - restrictinfo->mergeopfamilies == NIL) - continue; /* not mergejoinable */ - - /* - * Check if clause has the form "outer op inner" or "inner op outer", - * and if so mark which side is inner. - */ - if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, - innerrel->relids)) - continue; /* no good for these input relations */ - - /* OK, add to list */ - clause_list = lappend(clause_list, restrictinfo); } - /* - * Now that we have the relevant equality join clauses, try to prove the - * innerrel distinct. - */ - if (rel_is_distinct_for(root, innerrel, clause_list)) - return true; - - /* - * Some day it would be nice to check for other methods of establishing - * distinctness. - */ - return false; + return is_innerrel_unique_for(root, joinrelids, sjinfo->min_lefthand, + innerrel, sjinfo->jointype, innerrel->joininfo, + NULL /*unique_index*/); } - /* * Remove the target relid from the planner's data structures, having * determined that there is no need to include it in the query. @@ -568,7 +528,7 @@ reduce_unique_semijoins(PlannerInfo *root) /* Test whether the innerrel is unique for those clauses. */ if (!innerrel_is_unique(root, joinrelids, sjinfo->min_lefthand, innerrel, - JOIN_SEMI, restrictlist, true)) + JOIN_SEMI, restrictlist, true, NULL /*index_info*/)) continue; /* OK, remove the SpecialJoinInfo from the list. */ @@ -643,9 +603,13 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) * Note that the passed-in clause_list may be destructively modified! This * is OK for current uses, because the clause_list is built by the caller for * the sole purpose of passing to this function. + * + * If unique_index is not null, it is set to point to the index that guarantees + * uniqueness for a base relation. */ static bool -rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list, + UniqueIndexInfo **index_info) { /* * We could skip a couple of tests here if we assume all callers checked @@ -661,8 +625,8 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) * relation_has_unique_index_for automatically adds any usable * restriction clauses for the rel, so we needn't do that here. */ - if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) - return true; + return relation_has_unique_index_for(root, rel, clause_list, NIL, NIL, + index_info); } else if (rel->rtekind == RTE_SUBQUERY) { @@ -966,6 +930,10 @@ distinct_col_search(int colno, List *colnos, List *opids) * heuristic about whether to cache negative answers; it should be "true" * if making an inquiry that is not part of the normal bottom-up join search * sequence. + * + * If index_info_out is not null, it is set to point to a new UniqueIndexInfo + * allocated in root memory context, that describes the index that guarantees + * uniqueness. */ bool innerrel_is_unique(PlannerInfo *root, @@ -974,12 +942,23 @@ innerrel_is_unique(PlannerInfo *root, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, - bool force_cache) + bool force_cache, + UniqueIndexInfo **index_info_out) { MemoryContext old_context; ListCell *lc; + UniqueIndexInfo *index_info; + + if (index_info_out) + *index_info_out = NULL; - /* Certainly can't prove uniqueness when there are no joinclauses */ + /* + * It is possible to prove uniqueness even in the absence of joinclauses, + * just from baserestrictinfos alone. However, in these cases the inner + * relation returns one row at most, so join removal won't give much + * benefit. It seems better to save some planning time by ignoring these + * cases. + */ if (restrictlist == NIL) return false; @@ -999,10 +978,14 @@ innerrel_is_unique(PlannerInfo *root, */ foreach(lc, innerrel->unique_for_rels) { - Relids unique_for_rels = (Relids) lfirst(lc); + Relids unique_for_rels = (Relids) linitial(lfirst(lc)); if (bms_is_subset(unique_for_rels, outerrelids)) + { + if (index_info_out) + *index_info_out = lsecond(lfirst(lc)); return true; /* Success! */ + } } /* @@ -1019,7 +1002,7 @@ innerrel_is_unique(PlannerInfo *root, /* No cached information, so try to make the proof. */ if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, - jointype, restrictlist)) + jointype, restrictlist, &index_info)) { /* * Cache the positive result for future probes, being sure to keep it @@ -1033,9 +1016,12 @@ innerrel_is_unique(PlannerInfo *root, */ old_context = MemoryContextSwitchTo(root->planner_cxt); innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, - bms_copy(outerrelids)); + list_make2(bms_copy(outerrelids), index_info)); MemoryContextSwitchTo(old_context); + if (index_info_out) + *index_info_out = index_info; + return true; /* Success! */ } else @@ -1081,7 +1067,8 @@ is_innerrel_unique_for(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist) + List *restrictlist, + UniqueIndexInfo **index_info) { List *clause_list = NIL; ListCell *lc; @@ -1123,5 +1110,776 @@ is_innerrel_unique_for(PlannerInfo *root, } /* Let rel_is_distinct_for() do the hard work */ - return rel_is_distinct_for(root, innerrel, clause_list); + return rel_is_distinct_for(root, innerrel, clause_list, index_info); +} + +typedef struct +{ + Index oldRelid; + Index newRelid; + bool found; +} ChangeVarnoContext; + +static bool +change_varno_walker(Node *node, ChangeVarnoContext *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Var) && ((Var *) node)->varno == context->oldRelid) + { + ((Var *) node)->varno = context->newRelid; + context->found = true; + return false; + } + + return expression_tree_walker(node, change_varno_walker, context); +} + +/* + * For all Vars in the expression that have varno = oldRelid, set + * varno = newRelid. + * Return whether any changes were made. + */ +static bool +change_varno(Expr *expr, Index oldRelid, Index newRelid) +{ + ChangeVarnoContext context; + context.oldRelid = oldRelid; + context.newRelid = newRelid; + context.found = false; + change_varno_walker((Node *) expr, &context); + return context.found; +} + +/* + * Substitute newId for oldId in relids. + */ +static void +change_relid(Relids *relids, Index oldId, Index newId) +{ + if (bms_is_member(oldId, *relids)) + *relids = bms_add_member(bms_del_member(*relids, oldId), newId); +} + +/* + * Move EC members from the removed relation to the remaining one, + * removing duplicates. + */ +static void +move_ec_members(EquivalenceClass *ec, Index toRemove, Index toKeep) +{ + ListCell *prev = NULL; + ListCell *cell = NULL; + ListCell *next = list_head(ec->ec_members); + + while (next) + { + EquivalenceMember *em; + + prev = cell; + cell = next; + next = lnext(next); + + em = lfirst(cell); + if (change_varno(em->em_expr, toRemove, toKeep)) + { + /* + * If we transferred the equivalence member to another relation, + * check that is not the same as the existing member, and if it + * is, delete it. + */ + ListCell *lc; + foreach (lc, ec->ec_members) + { + if (lc == cell) + continue; + + if (equal(((EquivalenceMember *) lfirst(lc))->em_expr, + em->em_expr)) + { + ec->ec_members = list_delete_cell(ec->ec_members, cell, prev); + cell = prev; + break; + } + } + + if (lc == NULL) + { + /* + * We get to keep this EquivalenceMember. Correct its relids. + * nullable_relids should be empty, because self join removal + * only works for inner joins. + */ + Assert(em->em_nullable_relids == NULL); + change_relid(&em->em_relids, toRemove, toKeep); + } + } + } +} + +/* + * Remove EC sources referencing given relation. + */ +static void +filter_ec_sources(List **sources, Index relToRemove) +{ + ListCell *prev = NULL; + ListCell *cell = NULL; + ListCell *next = list_head(*sources); + + while (next) + { + RestrictInfo *rinfo; + + prev = cell; + cell = next; + next = lnext(next); + + rinfo = castNode(RestrictInfo, lfirst(cell)); + + if (bms_is_member(relToRemove, rinfo->required_relids)) + { + *sources = list_delete_cell(*sources, cell, prev); + cell = prev; + } + } +} + +/* + * Scratch space for the unique self join removal code. + */ +typedef struct +{ + PlannerInfo *root; + + /* Temporary array for relation ids. */ + Index *relids; + + /* + * Array of Relids, one for each relation, indexed by relation id. + * Each element is a set of relation ids with which this relation + * has a special join. + */ + Relids *special_join_rels; + + /* Array of row marks indexed by relid. */ + PlanRowMark **row_marks; + + /* Bitmapset for join relids that is used to avoid reallocation. */ + Relids joinrelids; + + /* + * Top-level targetlist of the query. We have to update any references + * it has to the relations we remove. + */ + List *targetlist; +} UsjScratch; + +/* + * Remove a relation after we have proven that it participates only in an + * unneeded unique self join. + * + * The joinclauses list is destructively changed. + */ +static void +remove_self_join_rel(UsjScratch *scratch, Relids joinrelids, List *joinclauses, + RelOptInfo *toKeep, RelOptInfo *toRemove) +{ + PlannerInfo *root = scratch->root; + ListCell *prev, *cell, *next; + List *toAppend; + int i; + + /* + * Transfer join and restriction clauses from the removed relation to the + * remaining one. We change the Vars of the clause to point to the remaining + * relation instead of the removed one. The clauses that require a subset of + * joinrelids become restriction clauses of the remaining relation, and + * others remain join clauses. We append them to baserestrictinfo and + * joininfo respectively, trying not to introduce duplicates. + * + * We also have to process the 'joinclauses' list here, because it contains + * EC-derived join clauses which must become filter clauses. It is not enough + * to just correct the ECs, because the EC-derived restrictions are generated + * before join removal (see generate_base_implied_equalities). + */ + toAppend = list_concat(joinclauses, toRemove->baserestrictinfo); + toAppend = list_concat(toAppend, toRemove->joininfo); + + foreach(cell, toAppend) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell); + bool is_join_clause = !bms_is_subset(rinfo->required_relids, joinrelids); + List **target = is_join_clause ? &toKeep->joininfo : &toKeep->baserestrictinfo; + + /* We can't have an EC-derived clause that joins to some third relation */ + Assert( !(is_join_clause && rinfo->parent_ec != NULL) ); + + /* + * Do not add multiple clauses derived from the same equivalence class. + */ + if (is_redundant_derived_clause(rinfo, *target)) + continue; + + /* + * Replace the references to the removed relation with references to + * the remaining one. + */ + change_varno(rinfo->clause, toRemove->relid, toKeep->relid); + + /* + * After we have replaced the Vars, check that the resulting clause is + * not implied by the existing ones. + */ + if (!contain_mutable_functions((Node *) rinfo->clause) + && predicate_implied_by(list_make1(rinfo->clause), + *target, false /*weak*/ )) + continue; /* provably implied by r1 */ + + /* + * If the clause has the form of "X=X", replace it with null test. + */ + if (rinfo->mergeopfamilies) + { + Expr *leftOp = (Expr *) get_leftop(rinfo->clause); + Expr *rightOp = (Expr *) get_rightop(rinfo->clause); + + if (leftOp != NULL && equal(leftOp, rightOp)) + { + NullTest *test = makeNode(NullTest); + test->arg = leftOp; + test->nulltesttype = IS_NOT_NULL; + test->argisrow = false; + test->location = -1; + rinfo->clause = (Expr *) test; + } + } + + /* + * Finally, correct the relids of the rinfo, replace the clause with + * the one we just constructed, and append it to the remaining relation. + */ + change_relid(&rinfo->required_relids, toRemove->relid, toKeep->relid); + change_relid(&rinfo->left_relids, toRemove->relid, toKeep->relid); + change_relid(&rinfo->right_relids, toRemove->relid, toKeep->relid); + change_relid(&rinfo->clause_relids, toRemove->relid, toKeep->relid); + + *target = lappend(*target, rinfo); + } + + /* + * Transfer the targetlist and attr_needed flags. + */ + Assert(toRemove->reltarget->sortgrouprefs == 0); + + foreach (cell, toRemove->reltarget->exprs) + { + Expr *node = lfirst(cell); + change_varno(node, toRemove->relid, toKeep->relid); + if (!list_member(toKeep->reltarget->exprs, node)) + toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, + node); + } + + for (i = toKeep->min_attr; i <= toKeep->max_attr; i++) + { + int attno = i - toKeep->min_attr; + toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno], + toRemove->attr_needed[attno]); + } + + /* + * If the removed relation has a row mark, transfer it to the remaining one. + * + * If both rels have row marks, just keep the one corresponding to the + * remaining relation, because we verified earlier that they have the same + * strength. + * + * Also make sure that the scratch->row_marks cache is up to date, because + * we are going to use it for further join removals. + */ + if (scratch->row_marks[toRemove->relid]) + { + PlanRowMark **markToRemove = &scratch->row_marks[toRemove->relid]; + PlanRowMark **markToKeep = &scratch->row_marks[toKeep->relid]; + if (*markToKeep) + { + Assert((*markToKeep)->markType == (*markToRemove)->markType); + + root->rowMarks = list_delete_ptr(root->rowMarks, *markToKeep); + *markToKeep = NULL; + } + else + { + *markToKeep = *markToRemove; + *markToRemove = NULL; + + /* Shouldn't have inheritance children here. */ + Assert((*markToKeep)->rti == (*markToKeep)->prti); + + (*markToKeep)->rti = toKeep->relid; + (*markToKeep)->prti = toKeep->relid; + } + } + + /* + * Likewise remove references from SpecialJoinInfo data structures. + * + * This is relevant in case the outer join we're deleting is nested inside + * other outer joins: the upper joins' relid sets have to be adjusted. The + * RHS of the target outer join will be made empty here, but that's OK + * since caller will delete that SpecialJoinInfo entirely. + */ + foreach (cell, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(cell); + + sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, toRemove->relid); + sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, toRemove->relid); + sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, toRemove->relid); + sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, toRemove->relid); + } + + + // !!!FIXME what about placeholders and upper-level tlists (e.g. for grouping)? + // The placeholders apparently work somehow due to the fact that they reference + // the same Var objects that we modify to point to the other relation. + + /* + * We must move the equivalence members that reference the removed relation + * to the remaining one, being careful not to introduce duplicate members. + * If a EC contains one member and is not used for sorting, it can be removed + * altogether. + */ + prev = NULL; + cell = NULL; + next = list_head(root->eq_classes); + while (next) + { + EquivalenceClass *ec; + + prev = cell; + cell = next; + next = lnext(next); + + ec = lfirst(cell); + + if (!bms_is_member(toRemove->relid, ec->ec_relids)) + { + /* + * This EC doesn't reference the removed relation, + * nothing to be done for it. + */ + continue; + } + + /* + * Update the EC to reference the remaining relation instead + * of the removed one. + */ + move_ec_members(ec, toRemove->relid, toKeep->relid); + change_relid(&ec->ec_relids, toRemove->relid, toKeep->relid); + + if (ec->ec_sortref == 0 && list_length(ec->ec_members) <= 1) + { + /* + * This EC is not used for sorting and contains one member. + * It won't generate any join clauses and can be removed. + */ + root->eq_classes = list_delete_cell(root->eq_classes, cell, prev); + cell = prev; + } + else + { + /* + * The updated EC should be kept. + * + * Some of its source and derived RestrictInfos point to the removed + * relation. There is no straightforward way to determine whether + * such a RestrictInfo should be removed or switched to the + * remaining relation. We remove all of them, and will generate + * the correct ones from equivalence members on demand. + * + * This is not important for the ECs that are only used for sorting, + * but we process them too for the sake of consistency. + */ + filter_ec_sources(&ec->ec_sources, toRemove->relid); + filter_ec_sources(&ec->ec_derives, toRemove->relid); + ec->ec_relids = bms_del_member(ec->ec_relids, toRemove->relid); + } + } + + /* + * Mark the rel as "dead" to show it is no longer part of the join tree. + * (Removing it from the baserel array altogether seems too risky.) + */ + toRemove->reloptkind = RELOPT_DEADREL; + + /* + * There may be references to the removed rel in other baserels' attr_needed + * arrays. Switch them to point to the remaining rel. + */ + for (i = 1; i < root->simple_rel_array_size; i++) + { + RelOptInfo *otherrel = root->simple_rel_array[i]; + int attroff; + + /* no point in processing target rel itself */ + if (i == toRemove->relid) + continue; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (otherrel == NULL) + continue; + + Assert(otherrel->relid == i); /* sanity check on array */ + + for (attroff = otherrel->max_attr - otherrel->min_attr; + attroff >= 0; + attroff--) + { + change_relid(&otherrel->attr_needed[attroff], toRemove->relid, + toKeep->relid); + } + } +} + +/* + * Test whether the relations are joined on the same unique column. + */ +static bool +is_unique_self_join(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer, + RelOptInfo *inner, List *restrictlist) +{ + UniqueIndexInfo *outeridx = NULL; + UniqueIndexInfo *inneridx = NULL; + ListCell *outerCell, *innerCell; + + innerrel_is_unique(root, joinrelids, inner->relids, + outer, JOIN_INNER, restrictlist, true, &outeridx); + if (!outeridx) + return false; + + innerrel_is_unique(root, joinrelids, outer->relids, + inner, JOIN_INNER, restrictlist, true, &inneridx); + if (!inneridx) + return false; + + /* We must have the same unique index for both relations. */ + if (outeridx->index->indexoid != inneridx->index->indexoid) + return false; + + /* + * The index clauses must also be the same. The varnos are different, so + * make a copy and replace all varnos of one relation with another, so + * that we can compare them with equal(). + */ + forboth(innerCell, inneridx->clauses, outerCell, outeridx->clauses) + { + Expr *innerExpr = copyObject(castNode(RestrictInfo, lfirst(innerCell))->clause); + Expr *outerExpr = copyObject(castNode(RestrictInfo, lfirst(outerCell))->clause); + change_varno(outerExpr, outer->relid, inner->relid); + change_varno(innerExpr, outer->relid, inner->relid); + if (!equal(outerExpr, innerExpr)) + { + pfree(outerExpr); + pfree(innerExpr); + return false; + } + pfree(outerExpr); + pfree(innerExpr); + } + + return true; +} + +/* + * Find and remove unique self joins in a group of base relations that have + * the same Oid. + * + * Returns IntList of the relids that were removed. + */ +static List * +remove_self_joins_one_group(UsjScratch *scratch, Index *relids, int n) +{ + PlannerInfo *root = scratch->root; + Relids joinrelids = scratch->joinrelids; + List *result = NIL; + int i, o; + ListCell *lc; + + if (n < 2) + return NIL; + + for (o = 0; o < n; o++) + { + RelOptInfo *outer = root->simple_rel_array[relids[o]]; + + for (i = o + 1; i < n; i++) + { + RelOptInfo *inner = root->simple_rel_array[relids[i]]; + List *restrictlist; + + /* A sanity check: the relations have the same Oid. */ + Assert(root->simple_rte_array[relids[i]]->relid + == root->simple_rte_array[relids[o]]->relid); + + /* + * This optimization applies to inner joins only, so skip any relations + * that form a special join. + */ + if (bms_is_member(relids[i], scratch->special_join_rels[relids[o]])) + continue; + + /* Reuse joinrelids bitset to avoid reallocation. */ + joinrelids = bms_del_members(joinrelids, joinrelids); + + /* + * We only deal with base rels here, so their relids bitset + * contains only one member -- their relid. + */ + joinrelids = bms_add_member(joinrelids, relids[o]); + joinrelids = bms_add_member(joinrelids, relids[i]); + + /* Is it a unique self join? */ + restrictlist = build_joinrel_restrictlist(root, joinrelids, outer, + inner); + if (!is_unique_self_join(root, joinrelids, outer, inner, + restrictlist)) + continue; + + /* + * We can't remove the join if the relations have row marks of + * different strength (e.g. one is locked FOR UPDATE and another + * just has ROW_MARK_REFERENCE for EvalPlanQual rechecking). + */ + if (scratch->row_marks[relids[i]] && scratch->row_marks[relids[o]] + && scratch->row_marks[relids[i]]->markType + != scratch->row_marks[relids[o]]->markType) + { + continue; + } + + /* + * We can remove either relation, so remove the outer one, + * to simplify this loop. + */ + remove_self_join_rel(scratch, joinrelids, restrictlist, inner, outer); + result = lappend_int(result, relids[o]); + + /* + * Replace varno in root targetlist. + */ + foreach(lc, scratch->targetlist) + change_varno(lfirst(lc), relids[o], relids[i]); + + /* We removed the outer relation, try the next one. */ + break; + } + } + + scratch->joinrelids = joinrelids; + return result; +} + +/* + * A qsort comparator to sort the relids by the relation Oid. + */ +static int +compare_rte(const Index *left, const Index *right, PlannerInfo *root) +{ + Oid l = root->simple_rte_array[*left]->relid; + Oid r = root->simple_rte_array[*right]->relid; + return l < r ? 1 : ( l == r ? 0 : -1 ); +} + +/* + * Find and remove unique self joins on a particular level of the join tree. + * + * We sort the relations by Oid and then examine each group with the same Oid. + * If we removed any relation, remove it from joinlist as well. + */ +static void +remove_self_joins_one_level(UsjScratch *scratch, List **joinlist) +{ + ListCell *lc; + List *relidsToRemove = NIL; + Oid groupOid; + int groupStart; + int i; + int n = 0; + Index *relid_ascending = scratch->relids; + PlannerInfo *root = scratch->root; + + /* + * Collect the ids of base relations at this level of the join tree. + */ + foreach (lc, *joinlist) + { + RangeTblEntry *rte; + RelOptInfo *rel; + RangeTblRef *ref = (RangeTblRef *) lfirst(lc); + if (!IsA(ref, RangeTblRef)) + continue; + + rte = root->simple_rte_array[ref->rtindex]; + rel = root->simple_rel_array[ref->rtindex]; + + /* We only care about base relations from which we select something. */ + if (rte->rtekind != RTE_RELATION || rte->relkind != RELKIND_RELATION + || rel == NULL) + { + continue; + } + + /* This optimization won't work for tables that have inheritance children. */ + if (rte->inh) + continue; + + relid_ascending[n++] = ref->rtindex; + + /* Limit the number of joins we process to control the quadratic behavior. */ + if (n > join_collapse_limit) + break; + } + + if (n < 2) + return; + + /* + * Find and process the groups of relations that have same Oid. + */ + qsort_arg(relid_ascending, n, sizeof(*relid_ascending), + (qsort_arg_comparator) compare_rte, root); + groupOid = root->simple_rte_array[relid_ascending[0]]->relid; + groupStart = 0; + for (i = 1; i < n; i++) + { + RangeTblEntry *rte = root->simple_rte_array[relid_ascending[i]]; + Assert(rte->relid != InvalidOid); + if (rte->relid != groupOid) + { + relidsToRemove = list_concat(relidsToRemove, + remove_self_joins_one_group(scratch, &relid_ascending[groupStart], + i - groupStart)); + groupOid = rte->relid; + groupStart = i; + } + } + Assert(groupOid != InvalidOid); + Assert(groupStart < n); + relidsToRemove = list_concat(relidsToRemove, + remove_self_joins_one_group(scratch, &relid_ascending[groupStart], + n - groupStart)); + + /* + * Delete the removed relations from joinlist. + */ + foreach(lc, relidsToRemove) + { + Index indexToRemove = lfirst_int(lc); + ListCell *prev = NULL, *next = NULL; + ListCell *lc2 = list_head(*joinlist); + while (lc2) + { + next = lnext(lc2); + if (castNode(RangeTblRef, lfirst(lc2))->rtindex == indexToRemove) + *joinlist = list_delete_cell(*joinlist, lc2, prev); + else + prev = lc2; + lc2 = next; + } + } + + return; +} + +/* + * Find and remove unique self joins on a single level of a join tree, and + * recurse to handle deeper levels. + */ +static void +remove_self_joins_recurse(UsjScratch *scratch, List **joinlist) +{ + ListCell *lc; + foreach (lc, *joinlist) + { + switch (((Node*) lfirst(lc))->type) + { + case T_List: + remove_self_joins_recurse(scratch, (List **) &lfirst(lc)); + break; + case T_RangeTblRef: + break; + default: + Assert(false); + } + } + remove_self_joins_one_level(scratch, joinlist); +} + +/* + * Find and remove unique self joins in the entire join tree. + * + * We try to find joins where the same physical relation is joined to + * itself on all the columns of a unique index. Such joins can be + * can be replaced with a scan using the combined filters. + * + * When we wind such a join, we mark one of the participating relation as + * dead, and rewrite all references to it to point to the remaining + * relation. + * + * 'targetlist' is the top-level targetlist of query. We fix any references + * it has to the relations we remove. + */ +void +remove_useless_self_joins(PlannerInfo *root, List **joinlist, List *targetlist) +{ + ListCell *lc; + UsjScratch scratch; + + scratch.root = root; + scratch.relids = palloc(root->simple_rel_array_size * sizeof(Index)); + scratch.special_join_rels = palloc0(root->simple_rel_array_size * sizeof(Relids)); + scratch.row_marks = palloc0(root->simple_rel_array_size * sizeof(PlanRowMark *)); + scratch.joinrelids = NULL; + scratch.targetlist = targetlist; + + /* Find out which relations have special joins to which. */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc); + int bit = -1; + while ((bit = bms_next_member(info->min_lefthand, bit)) >= 0) + { + RelOptInfo *rel = find_base_rel(root, bit); + scratch.special_join_rels[rel->relid] = + bms_add_members(scratch.special_join_rels[rel->relid], + info->min_righthand); + } + + bit = -1; + while ((bit = bms_next_member(info->min_righthand, bit)) >= 0) + { + RelOptInfo *rel = find_base_rel(root, bit); + scratch.special_join_rels[rel->relid] = + bms_add_members(scratch.special_join_rels[rel->relid], + info->min_lefthand); + } + } + + /* Collect row marks. */ + foreach (lc, root->rowMarks) + { + PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc); + + /* Can't have more than one row mark for a relation. */ + Assert(scratch.row_marks[rowMark->rti] == NULL); + + scratch.row_marks[rowMark->rti] = rowMark; + } + + /* Finally, remove the joins. */ + remove_self_joins_recurse(&scratch, joinlist); } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index b05adc7..49c5f61 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -196,7 +196,7 @@ query_planner(PlannerInfo *root, List *tlist, * jointree preprocessing, but the necessary information isn't available * until we've built baserel data structures and classified qual clauses. */ - joinlist = remove_useless_joins(root, joinlist); + joinlist = remove_useless_left_joins(root, joinlist); /* * Also, reduce any semijoins with unique inner rels to plain inner joins. @@ -205,6 +205,11 @@ query_planner(PlannerInfo *root, List *tlist, reduce_unique_semijoins(root); /* + * Remove self joins on a unique column. + */ + remove_useless_self_joins(root, &joinlist, tlist); + + /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a * placeholder is evaluable at a base rel. diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 83008d7..0527420 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -112,15 +112,14 @@ assign_param_for_var(PlannerInfo *root, Var *var) /* * This comparison must match _equalVar(), except for ignoring - * varlevelsup. Note that _equalVar() ignores the location. + * varlevelsup. Note that _equalVar() ignores the location and + * old varno/varattno. */ if (pvar->varno == var->varno && pvar->varattno == var->varattno && pvar->vartype == var->vartype && pvar->vartypmod == var->vartypmod && - pvar->varcollid == var->varcollid && - pvar->varnoold == var->varnoold && - pvar->varoattno == var->varoattno) + pvar->varcollid == var->varcollid) return pitem->paramId; } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index d50d86b..3f73b38 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1585,7 +1585,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree && relation_has_unique_index_for(root, rel, NIL, sjinfo->semi_rhs_exprs, - sjinfo->semi_operators)) + sjinfo->semi_operators, + NULL /*index_info*/)) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->path.rows = rel->rows; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 39f5729..1b21a6e 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -38,14 +38,10 @@ typedef struct JoinHashEntry static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel); -static List *build_joinrel_restrictlist(PlannerInfo *root, - RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel); static void build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); -static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, +static List *subbuild_joinrel_restrictlist(Relids joinrelids, List *joininfo_list, List *new_restrictlist); static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, @@ -548,7 +544,7 @@ build_join_rel(PlannerInfo *root, */ if (restrictlist_ptr) *restrictlist_ptr = build_joinrel_restrictlist(root, - joinrel, + joinrel->relids, outer_rel, inner_rel); return joinrel; @@ -651,7 +647,7 @@ build_join_rel(PlannerInfo *root, * caller might or might not need the restrictlist, but I need it anyway * for set_joinrel_size_estimates().) */ - restrictlist = build_joinrel_restrictlist(root, joinrel, + restrictlist = build_joinrel_restrictlist(root, joinrel->relids, outer_rel, inner_rel); if (restrictlist_ptr) *restrictlist_ptr = restrictlist; @@ -973,7 +969,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * the various joinlist entries ultimately refer to RestrictInfos * pushed into them by distribute_restrictinfo_to_rels(). * - * 'joinrel' is a join relation node + * 'joinrelids' is a join relation id set * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined * to form joinrel. * @@ -986,9 +982,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * RestrictInfo nodes are no longer context-dependent. Instead, just include * the original nodes in the lists made for the join relation. */ -static List * +List * build_joinrel_restrictlist(PlannerInfo *root, - RelOptInfo *joinrel, + Relids joinrelids, RelOptInfo *outer_rel, RelOptInfo *inner_rel) { @@ -999,8 +995,8 @@ build_joinrel_restrictlist(PlannerInfo *root, * eliminating any duplicates (important since we will see many of the * same clauses arriving from both input relations). */ - result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL); - result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result); + result = subbuild_joinrel_restrictlist(joinrelids, outer_rel->joininfo, NIL); + result = subbuild_joinrel_restrictlist(joinrelids, inner_rel->joininfo, result); /* * Add on any clauses derived from EquivalenceClasses. These cannot be @@ -1009,7 +1005,7 @@ build_joinrel_restrictlist(PlannerInfo *root, */ result = list_concat(result, generate_join_implied_equalities(root, - joinrel->relids, + joinrelids, outer_rel->relids, inner_rel)); @@ -1035,7 +1031,7 @@ build_joinrel_joinlist(RelOptInfo *joinrel, } static List * -subbuild_joinrel_restrictlist(RelOptInfo *joinrel, +subbuild_joinrel_restrictlist(Relids joinrelids, List *joininfo_list, List *new_restrictlist) { @@ -1045,7 +1041,7 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel, { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - if (bms_is_subset(rinfo->required_relids, joinrel->relids)) + if (bms_is_subset(rinfo->required_relids, joinrelids)) { /* * This clause becomes a restriction clause for the joinrel, since diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 88d3723..d2dc20c 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -514,8 +514,10 @@ typedef struct PartitionSchemeData *PartitionScheme; * populate these fields, for base rels; but someday they might be used for * join rels too: * - * unique_for_rels - list of Relid sets, each one being a set of other - * rels for which this one has been proven unique + * unique_for_rels - list of (Relids, UniqueIndexInfo*) lists, where Relids + * is a set of other rels for which this one has been proven + * unique, and UniqueIndexInfo* stores information about the + * index that makes it unique, if any. * non_unique_for_rels - list of Relid sets, each one being a set of * other rels for which we have tried and failed to prove * this one unique diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 81abcf5..6814660 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -271,6 +271,11 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List **restrictlist_ptr); + +extern List *build_joinrel_restrictlist(PlannerInfo *root, + Relids joinrelids, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer_rel, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index cafde30..ad4a2ad 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -71,9 +71,20 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); * routines to generate index paths */ extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel); +/* + * UniqueIndexInfo describes a unique index and its corresponding clauses + * that guarantee the uniqueness of a relation. + */ +typedef struct UniqueIndexInfo +{ + IndexOptInfo *index; + List *clauses; +} UniqueIndexInfo; + extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, - List *exprlist, List *oprlist); + List *exprlist, List *oprlist, + UniqueIndexInfo **info); extern bool indexcol_is_bool_constant_for_query(IndexOptInfo *index, int indexcol); extern bool match_index_to_operand(Node *operand, int indexcol, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index c8ab028..b9b00cc 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -14,6 +14,7 @@ #ifndef PLANMAIN_H #define PLANMAIN_H +#include "optimizer/paths.h" #include "nodes/plannodes.h" #include "nodes/relation.h" @@ -103,13 +104,18 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root); /* * prototypes for plan/analyzejoins.c */ -extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); +extern List *remove_useless_left_joins(PlannerInfo *root, List *joinlist); extern void reduce_unique_semijoins(PlannerInfo *root); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); + extern bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, - JoinType jointype, List *restrictlist, bool force_cache); + JoinType jointype, List *restrictlist, bool force_cache, + UniqueIndexInfo **index_info); + +extern void remove_useless_self_joins(PlannerInfo *root, List **jointree, + List *tlist); /* * prototypes for plan/setrefs.c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 1f53780..86a790f 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4321,11 +4321,13 @@ explain (costs off) select p.* from (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k where p.k = 1 and p.k = 2; - QUERY PLAN --------------------------- + QUERY PLAN +------------------------------------------------ Result One-Time Filter: false -(2 rows) + -> Index Scan using parent_pkey on parent x + Index Cond: (k = 1) +(4 rows) -- bug 5255: this is not optimizable by join removal begin; @@ -4442,6 +4444,113 @@ select * from (0 rows) -- +-- test that semi- or inner self-joins on a unique column are removed +-- +create table sj (a int unique, b int); +insert into sj values (1, null), (null, 2), (2, 1); +analyze sj; +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + a | b +---+--- + 2 | 1 +(1 row) + +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + QUERY PLAN +----------------------------------------------- + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b = (a - 1))) +(2 rows) + +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + QUERY PLAN +------------------------------------------ + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b < 10)) +(2 rows) + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + QUERY PLAN +------------------------------------------ + Seq Scan on sj t2 + Filter: (a IS NOT NULL) + SubPlan 1 + -> Result + One-Time Filter: (t2.a = t2.a) + -> Seq Scan on sj + Filter: (a = t2.a) +(7 rows) + +-- equality to different constants +create unique index sj_ab on sj(a, b); +explain (costs off) +select * from sj t1, sj t2 +where t1.a = t2.a and t1.b = 1 and t2.b = 2; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on sj t1 + Filter: (b = 1) + -> Seq Scan on sj t2 + Filter: (b = 2) +(6 rows) + +-- Check that attr_needed is updated correctly after self-join removal. In +-- this test, k1.b is required at either j1 or j2. If this info is lost, +-- join targetlist for (k1, k2) will not contain k1.b. Use index scan for +-- k1 so that we don't get 'b' from physical tlist used for seqscan. Also +-- disable reordering of joins because this test depends on a particular +-- join tree. +create table sk (a int, b int); +create index sk_a_idx on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; + QUERY PLAN +----------------------------------------------------------- + Hash Join + Hash Cond: (k1.b = j2.b) + -> Merge Join + Merge Cond: (k1.a = k2.a) + -> Index Scan using sk_a_idx on sk k1 + -> Materialize + -> Index Only Scan using sk_a_idx on sk k2 + -> Hash + -> Index Only Scan using sj_ab on sj j2 + Index Cond: (a IS NOT NULL) +(10 rows) + +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; + QUERY PLAN +----------------------------------------------------------- + Hash Join + Hash Cond: (k1.b = j2.b) + -> Merge Join + Merge Cond: (k1.a = k2.a) + -> Index Scan using sk_a_idx on sk k1 + -> Materialize + -> Index Only Scan using sk_a_idx on sk k2 + -> Hash + -> Index Only Scan using sj_ab on sj j2 + Index Cond: (a IS NOT NULL) +(10 rows) + +reset join_collapse_limit; +reset enable_seqscan; +-- -- Test hints given on incorrect column references are useful -- select t1.uunique1 from @@ -5885,6 +5994,8 @@ left join j2 on j1.id1 = j2.id1 where j1.id2 = 1; Output: j2.id1, j2.id2 (8 rows) +-- !!!FIXME this test doesn't break if I set skip_mark_restore to true. +-- Also should avoid unique self join removal by using two different relations. -- validate logic in merge joins which skips mark and restore. -- it should only do this if all quals which were used to detect the unique -- are present as join quals, and not plain quals. @@ -5896,14 +6007,11 @@ create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1; explain (costs off) select * from j1 j1 inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1; - QUERY PLAN --------------------------------------------- - Merge Join - Merge Cond: (j1.id1 = j2.id1) - Join Filter: (j1.id2 = j2.id2) - -> Index Scan using j1_id1_idx on j1 - -> Index Scan using j1_id1_idx on j1 j2 -(5 rows) + QUERY PLAN +---------------------------------------------------------------------------- + Seq Scan on j1 j2 + Filter: ((id1 IS NOT NULL) AND (id2 IS NOT NULL) AND ((id1 % 1000) = 1)) +(2 rows) select * from j1 j1 inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 334a4dc..9772e49 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1535,6 +1535,56 @@ select * from int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok -- +-- test that semi- or inner self-joins on a unique column are removed +-- + +create table sj (a int unique, b int); +insert into sj values (1, null), (null, 2), (2, 1); +analyze sj; + +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + +-- equality to different constants +create unique index sj_ab on sj(a, b); + +explain (costs off) +select * from sj t1, sj t2 +where t1.a = t2.a and t1.b = 1 and t2.b = 2; + +-- Check that attr_needed is updated correctly after self-join removal. In +-- this test, k1.b is required at either j1 or j2. If this info is lost, +-- join targetlist for (k1, k2) will not contain k1.b. Use index scan for +-- k1 so that we don't get 'b' from physical tlist used for seqscan. Also +-- disable reordering of joins because this test depends on a particular +-- join tree. +create table sk (a int, b int); +create index sk_a_idx on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; +reset join_collapse_limit; +reset enable_seqscan; + +-- -- Test hints given on incorrect column references are useful -- @@ -1975,6 +2025,9 @@ explain (verbose, costs off) select * from j1 left join j2 on j1.id1 = j2.id1 where j1.id2 = 1; +-- !!!FIXME this test doesn't break if I set skip_mark_restore to true. +-- Also should avoid unique self join removal by using two different relations. + -- validate logic in merge joins which skips mark and restore. -- it should only do this if all quals which were used to detect the unique -- are present as join quals, and not plain quals.