David Rowley <david.row...@2ndquadrant.com> writes: > Attaching the original patch again so the commitfest bot gets off my > back about the other one not applying.
Pushed that one. I'm interested by the "POC" patch, but I agree that it'd take some research to show that it isn't a net negative for simple queries. It sounds like you're not really interested in pursuing that right now? Anyway, I rebased the POC patch up to HEAD, just in case anyone still wants to play with it. regards, tom lane
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 65302fe..2ffa00c 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2351,6 +2351,7 @@ _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) WRITE_NODE_FIELD(ec_sources); WRITE_NODE_FIELD(ec_derives); WRITE_BITMAPSET_FIELD(ec_relids); + WRITE_BITMAPSET_FIELD(ec_allrelids); WRITE_BOOL_FIELD(ec_has_const); WRITE_BOOL_FIELD(ec_has_volatile); WRITE_BOOL_FIELD(ec_below_outer_join); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0debac7..c5888b8 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -216,6 +216,8 @@ make_one_rel(PlannerInfo *root, List *joinlist) } root->total_table_pages = total_pages; + root->eq_index = create_eclass_index(root, EQUIVALANCE_IDX_MULTI_MEMBER); + /* * Generate access paths for each base rel. */ @@ -226,6 +228,9 @@ make_one_rel(PlannerInfo *root, List *joinlist) */ rel = make_rel_from_joinlist(root, joinlist); + free_eclass_index(root->eq_index); + root->eq_index = NULL; + /* * The result should join all and only the query's base rels. */ diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 61b5b11..570b42f 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -358,6 +358,7 @@ process_equivalence(PlannerInfo *root, ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources); ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives); ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); + ec1->ec_allrelids = bms_join(ec1->ec_allrelids, ec2->ec_allrelids); ec1->ec_has_const |= ec2->ec_has_const; /* can't need to set has_volatile */ ec1->ec_below_outer_join |= ec2->ec_below_outer_join; @@ -372,6 +373,7 @@ process_equivalence(PlannerInfo *root, ec2->ec_sources = NIL; ec2->ec_derives = NIL; ec2->ec_relids = NULL; + ec2->ec_allrelids = NULL; ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; ec1->ec_min_security = Min(ec1->ec_min_security, @@ -432,6 +434,7 @@ process_equivalence(PlannerInfo *root, ec->ec_sources = list_make1(restrictinfo); ec->ec_derives = NIL; ec->ec_relids = NULL; + ec->ec_allrelids = NULL; ec->ec_has_const = false; ec->ec_has_volatile = false; ec->ec_below_outer_join = below_outer_join; @@ -572,6 +575,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, { ec->ec_relids = bms_add_members(ec->ec_relids, relids); } + ec->ec_allrelids = bms_add_members(ec->ec_allrelids, relids); ec->ec_members = lappend(ec->ec_members, em); return em; @@ -708,6 +712,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, newec->ec_sources = NIL; newec->ec_derives = NIL; newec->ec_relids = NULL; + newec->ec_allrelids = NULL; newec->ec_has_const = false; newec->ec_has_volatile = contain_volatile_functions((Node *) expr); newec->ec_below_outer_join = false; @@ -1114,6 +1119,60 @@ generate_join_implied_equalities_for_ecs(PlannerInfo *root, nominal_join_relids = join_relids; } + if (root->eq_index != NULL) + { + Bitmapset *inner_ecs = NULL; + Bitmapset *join_ecs = NULL; + Bitmapset *matching_ecs; + int i; + + Assert((root->eq_index->ei_flags & EQUIVALANCE_IDX_MULTI_MEMBER)); + + i = -1; + while ((i = bms_next_member(inner_relids, i)) > 0) + inner_ecs = bms_add_members(inner_ecs, root->eq_index->ei_index[i]); + + i = -1; + while ((i = bms_next_member(join_relids, i)) > 0) + join_ecs = bms_add_members(join_ecs, root->eq_index->ei_index[i]); + + /* Determine all eclasses in common between inner rels and join rels */ + matching_ecs = bms_int_members(inner_ecs, join_ecs); + + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) + { + EquivalenceClass *ec = root->eq_index->ei_classes[i]; + List *sublist = NIL; + + /* ECs containing consts do not need any further enforcement */ + if (ec->ec_has_const) + continue; + + /* Ensure the class contains members for any of nominal_join_relids */ + Assert(bms_overlap(ec->ec_relids, nominal_join_relids)); + + if (!ec->ec_broken) + sublist = generate_join_implied_equalities_normal(root, + ec, + join_relids, + outer_relids, + inner_relids); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + sublist = generate_join_implied_equalities_broken(root, + ec, + nominal_join_relids, + outer_relids, + nominal_inner_relids, + inner_rel); + + result = list_concat(result, sublist); + } + return result; + } + foreach(lc, eclasses) { EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); @@ -2026,6 +2085,8 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) * we ignore that fine point here.) This is much like exprs_known_equal, * except that we insist on the comparison operator matching the eclass, so * that the result is definite not approximate. + * + * An eq_index without volatile classes must already exist on 'root'. */ EquivalenceClass * match_eclasses_to_foreign_key_col(PlannerInfo *root, @@ -2038,31 +2099,35 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, AttrNumber var2attno = fkinfo->confkey[colno]; Oid eqop = fkinfo->conpfeqop[colno]; List *opfamilies = NIL; /* compute only if needed */ - ListCell *lc1; + EquivalenceClassIndex *eq_index; + Bitmapset *matching_ecs; + int i; - foreach(lc1, root->eq_classes) + Assert(root->eq_index != NULL); + Assert((root->eq_index->ei_flags & EQUIVALANCE_IDX_NON_VOLATILE) != 0); + + eq_index = root->eq_index; + + /* Determine which eclasses contain members for both of the relations */ + matching_ecs = bms_intersect(eq_index->ei_index[var1varno], + eq_index->ei_index[var2varno]); + + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) { - EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + EquivalenceClass *ec = eq_index->ei_classes[i]; bool item1member = false; bool item2member = false; - ListCell *lc2; + ListCell *lc; - /* Never match to a volatile EC */ - if (ec->ec_has_volatile) - continue; - /* Note: it seems okay to match to "broken" eclasses here */ + /* the eclass index shouldn't have indexed these */ + Assert(!ec->ec_has_volatile); - /* - * If eclass visibly doesn't have members for both rels, there's no - * need to grovel through the members. - */ - if (!bms_is_member(var1varno, ec->ec_relids) || - !bms_is_member(var2varno, ec->ec_relids)) - continue; + /* Note: it seems okay to match to "broken" eclasses here */ - foreach(lc2, ec->ec_members) + foreach(lc, ec->ec_members) { - EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); Var *var; if (em->em_is_child) @@ -2238,6 +2303,106 @@ generate_implied_equalities_for_column(PlannerInfo *root, else parent_relids = NULL; /* not used, but keep compiler quiet */ + if (root->eq_index != NULL) + { + Bitmapset *matched_ecs = NULL; + int i; + + Assert((root->eq_index->ei_flags & EQUIVALANCE_IDX_MULTI_MEMBER)); + + i = -1; + while ((i = bms_next_member(rel->relids, i)) > 0) + matched_ecs = bms_add_members(matched_ecs, root->eq_index->ei_index[i]); + + i = -1; + while ((i = bms_next_member(matched_ecs, i)) >= 0) + { + EquivalenceClass *cur_ec = root->eq_index->ei_classes[i]; + EquivalenceMember *cur_em; + ListCell *lc2; + + /* Won't generate joinclauses if const */ + if (cur_ec->ec_has_const) + continue; + + /* + * Scan members, looking for a match to the target column. Note that + * child EC members are considered, but only when they belong to the + * target relation. (Unlike regular members, the same expression + * could be a child member of more than one EC. Therefore, it's + * potentially order-dependent which EC a child relation's target + * column gets matched to. This is annoying but it only happens in + * corner cases, so for now we live with just reporting the first + * match. See also get_eclass_for_sort_expr.) + */ + cur_em = NULL; + foreach(lc2, cur_ec->ec_members) + { + cur_em = (EquivalenceMember *) lfirst(lc2); + if (bms_equal(cur_em->em_relids, rel->relids) && + callback(root, rel, cur_ec, cur_em, callback_arg)) + break; + cur_em = NULL; + } + + if (!cur_em) + continue; + + /* + * Found our match. Scan the other EC members and attempt to generate + * joinclauses. + */ + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *rinfo; + + if (other_em->em_is_child) + continue; /* ignore children here */ + + /* Make sure it'll be a join to a different rel */ + if (other_em == cur_em || + bms_overlap(other_em->em_relids, rel->relids)) + continue; + + /* Forget it if caller doesn't want joins to this rel */ + if (bms_overlap(other_em->em_relids, prohibited_rels)) + continue; + + /* + * Also, if this is a child rel, avoid generating a useless join + * to its parent rel(s). + */ + if (is_child_rel && + bms_overlap(parent_relids, other_em->em_relids)) + continue; + + eq_op = select_equality_operator(cur_ec, + cur_em->em_datatype, + other_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + + /* set parent_ec to mark as redundant with other joinclauses */ + rinfo = create_join_clause(root, cur_ec, eq_op, + cur_em, other_em, + cur_ec); + + result = lappend(result, rinfo); + } + + /* + * If somehow we failed to create any join clauses, we might as well + * keep scanning the ECs for another match. But if we did make any, + * we're done, because we don't want to return non-redundant clauses. + */ + if (result) + break; + } + return result; + } + foreach(lc1, root->eq_classes) { EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); @@ -2354,6 +2519,23 @@ have_relevant_eclass_joinclause(PlannerInfo *root, { ListCell *lc1; + if (root->eq_index != NULL) + { + Bitmapset *rel1_ecs = NULL; + Bitmapset *rel2_ecs = NULL; + + int i = -1; + while ((i = bms_next_member(rel1->relids, i)) > 0) + rel1_ecs = bms_add_members(rel1_ecs, root->eq_index->ei_index[i]); + + i = -1; + while ((i = bms_next_member(rel2->relids, i)) > 0) + rel2_ecs = bms_add_members(rel2_ecs, root->eq_index->ei_index[i]); + + /* return true if there are any eclasses in common between the two relations */ + return bms_overlap(rel1_ecs, rel2_ecs); + } + foreach(lc1, root->eq_classes) { EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); @@ -2407,6 +2589,28 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) { ListCell *lc1; + if (root->eq_index != NULL) + { + Bitmapset *matched_ecs = NULL; + int i; + + Assert((root->eq_index->ei_flags & EQUIVALANCE_IDX_MULTI_MEMBER)); + + i = -1; + while ((i = bms_next_member(rel1->relids, i)) > 0) + matched_ecs = bms_add_members(matched_ecs, root->eq_index->ei_index[i]); + + i = -1; + + while ((i = bms_next_member(matched_ecs, i)) >= 0) + { + EquivalenceClass *ec = root->eq_index->ei_classes[i]; + if (!bms_is_subset(ec->ec_relids, rel1->relids)) + return true; + } + return false; + } + foreach(lc1, root->eq_classes) { EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); @@ -2556,3 +2760,76 @@ is_redundant_with_indexclauses(RestrictInfo *rinfo, List *indexclauses) return false; } + +EquivalenceClassIndex * +create_eclass_index(PlannerInfo *root, int flags) +{ + EquivalenceClassIndex *ec_index = makeNode(EquivalenceClassIndex); + EquivalenceClass **ei_classes; + Bitmapset **ei_index; + ListCell *lc; + int i; + + ec_index->ei_classes = ei_classes = palloc(sizeof(EquivalenceClass *) * + list_length(root->eq_classes)); + ec_index->ei_index = ei_index = palloc0(sizeof(Bitmapset *) * + root->simple_rel_array_size); + ec_index->ei_flags = flags; + + /* + * Index the eclass list so that we can quickly identify eclasses + * belonging to a given relation. First we build an array to store + * each eclass, this allows us to quickly lookup the eclass by array + * index. We then build an index using a Bitmapset for each relation to + * mark which eclass array element contains a member for that relation. + */ + i = 0; + foreach(lc, root->eq_classes) + ei_classes[i++] = lfirst(lc); + + /* + * We could build the indexes in the loop above, but looping backwards + * allows the Bitmapset to be allocated to its final size on the first + * allocation. Doing this forward could cause small incremental + * allocations which can be slower. + */ + for (i--; i >= 0; i--) + { + int relid = -1; + + while ((relid = bms_next_member(ei_classes[i]->ec_allrelids, relid)) > 0) + { + EquivalenceClass *ec = ei_classes[i]; + + /* Don't index classes with a const if flags mention not to. */ + if (ec->ec_has_const && (flags & EQUIVALANCE_IDX_NON_CONST) != 0) + continue; + + /* Skip volatile classes when not required in the index */ + if (ec->ec_has_volatile && + (flags & EQUIVALANCE_IDX_NON_VOLATILE) != 0) + continue; + + if (list_length(ec->ec_members) <= 1 && + (flags & EQUIVALANCE_IDX_MULTI_MEMBER) != 0) + continue; + + Assert(relid < root->simple_rel_array_size); + + /* mark this eclass as having a member for relid */ + ei_index[relid] = bms_add_member(ei_index[relid], i); + } + } + + return ec_index; +} + +void +free_eclass_index(EquivalenceClassIndex *eci) +{ + Assert(IsA(eci, EquivalenceClassIndex)); + + pfree(eci->ei_classes); + pfree(eci->ei_index); +} + diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 2afc3f1..a3bb646 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -2433,6 +2433,9 @@ match_foreign_keys_to_quals(PlannerInfo *root) List *newlist = NIL; ListCell *lc; + + root->eq_index = create_eclass_index(root, EQUIVALANCE_IDX_NON_VOLATILE); + foreach(lc, root->fkey_list) { ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); @@ -2578,6 +2581,10 @@ match_foreign_keys_to_quals(PlannerInfo *root) } /* Replace fkey_list, thereby discarding any useless entries */ root->fkey_list = newlist; + + /* clean up the eclass index */ + free_eclass_index(root->eq_index); + root->eq_index = NULL; } diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index f938925..736a1db 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -259,6 +259,7 @@ typedef enum NodeTag /* these aren't subclasses of Path: */ T_EquivalenceClass, T_EquivalenceMember, + T_EquivalenceClassIndex, T_PathKey, T_PathTarget, T_RestrictInfo, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index a008ae0..f50f98c 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -263,6 +263,8 @@ struct PlannerInfo List *eq_classes; /* list of active EquivalenceClasses */ + struct EquivalenceClassIndex *eq_index; /* Temporary eclass index */ + List *canon_pathkeys; /* list of "canonical" PathKeys */ List *left_join_clauses; /* list of RestrictInfos for mergejoinable @@ -923,6 +925,7 @@ typedef struct EquivalenceClass List *ec_derives; /* list of derived RestrictInfos */ Relids ec_relids; /* all relids appearing in ec_members, except * for child members (see below) */ + Relids ec_allrelids; /* all relids, including child members */ bool ec_has_const; /* any pseudoconstants in ec_members? */ bool ec_has_volatile; /* the (sole) member is a volatile expr */ bool ec_below_outer_join; /* equivalence applies below an OJ */ @@ -933,6 +936,22 @@ typedef struct EquivalenceClass struct EquivalenceClass *ec_merged; /* set if merged into another EC */ } EquivalenceClass; +typedef struct EquivalenceClassIndex +{ + NodeTag type; + + EquivalenceClass **ei_classes; /* an array of EquivalenceClasses */ + Bitmapset **ei_index; /* an array, indexed by relid contining + * a bitmapset of each ei_classes item + * which has a member belonging to this + * relation */ + int ei_flags; +} EquivalenceClassIndex; + +#define EQUIVALANCE_IDX_NON_CONST 1 +#define EQUIVALANCE_IDX_NON_VOLATILE 2 +#define EQUIVALANCE_IDX_MULTI_MEMBER 4 + /* * If an EC contains a const and isn't below-outer-join, any PathKey depending * on it must be redundant, since there's only one possible value of the key. diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 040335a..7c81239 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -169,6 +169,9 @@ extern bool eclass_useful_for_merging(PlannerInfo *root, extern bool is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist); extern bool is_redundant_with_indexclauses(RestrictInfo *rinfo, List *indexclauses); +extern EquivalenceClassIndex *create_eclass_index(PlannerInfo *root, + int flags); +extern void free_eclass_index(EquivalenceClassIndex *eci); /* * pathkeys.c