Heikki Linnakangas írta:
> On 26.10.2010 18:34, Boszormenyi Zoltan wrote:
>> thank you very much for pointing me to dynahash, here is the
>> next version that finally seems to work.
>>
>> Two patches are attached, the first is the absolute minimum for
>> making it work, this still has the Tree type for canon_pathkeys
>> and eq_classes got the same treatment as join_rel_list/join_rel_hash
>> has in the current sources: if the list grows larger than 32, a hash
>> table
>> is created. It seems to be be enough for doing in for
>> get_eclass_for_sort_expr()
>> only, the other users of eq_classes aren't bothered by this change.
>
> That's better, but can't you use dynahash for canon_pathkeys as well?
Here's a purely dynahash solution. It's somewhat slower than
the tree version, 0.45 vs 0.41 seconds in the cached case for the
previously posted test case.
Best regards,
Zoltán Böszörményi
--
----------------------------------
Zoltán Böszörményi
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
http://www.postgresql.at/
diff -durpN postgresql.orig/src/backend/optimizer/path/equivclass.c postgresql.1/src/backend/optimizer/path/equivclass.c
--- postgresql.orig/src/backend/optimizer/path/equivclass.c 2010-10-15 10:31:47.000000000 +0200
+++ postgresql.1/src/backend/optimizer/path/equivclass.c 2010-10-26 17:01:57.000000000 +0200
@@ -24,6 +24,7 @@
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/var.h"
+#include "utils/hsearch.h"
#include "utils/lsyscache.h"
@@ -360,75 +361,103 @@ add_eq_member(EquivalenceClass *ec, Expr
/*
- * get_eclass_for_sort_expr
- * Given an expression and opfamily info, find an existing equivalence
- * class it is a member of; if none, build a new single-member
- * EquivalenceClass for it.
- *
- * sortref is the SortGroupRef of the originating SortGroupClause, if any,
- * or zero if not. (It should never be zero if the expression is volatile!)
- *
- * This can be used safely both before and after EquivalenceClass merging;
- * since it never causes merging it does not invalidate any existing ECs
- * or PathKeys.
- *
- * Note: opfamilies must be chosen consistently with the way
- * process_equivalence() would do; that is, generated from a mergejoinable
- * equality operator. Else we might fail to detect valid equivalences,
- * generating poor (but not incorrect) plans.
+ * eq_classes_match - matching function for eq_classes_hash in PlannerInfo
*/
-EquivalenceClass *
-get_eclass_for_sort_expr(PlannerInfo *root,
- Expr *expr,
- Oid expr_datatype,
- List *opfamilies,
- Index sortref)
+static int
+eq_classes_match(const void *key1, const void *key2, Size keysize)
{
- EquivalenceClass *newec;
- EquivalenceMember *newem;
+ EquivalenceClass *ec1 = (EquivalenceClass *) key1; /* this is in the hashtable */
+ EquivalenceClass *ec2 = (EquivalenceClass *) key2; /* this is the new matched entry */
ListCell *lc1;
- MemoryContext oldcontext;
+ ListCell *lc2;
/*
- * Scan through the existing EquivalenceClasses for a match
+ * Never match to a volatile EC, except when we are looking at another
+ * reference to the same volatile SortGroupClause.
*/
- foreach(lc1, root->eq_classes)
+ if (ec1->ec_has_volatile &&
+ (ec2->ec_sortref == 0 || ec2->ec_sortref != ec1->ec_sortref))
+ return 1;
+
+ if (!equal(ec1->ec_opfamilies, ec2->ec_opfamilies))
+ return 1;
+
+ foreach(lc1, ec1->ec_members)
{
- EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
- ListCell *lc2;
+ EquivalenceMember *em1 = (EquivalenceMember *) lfirst(lc1);
/*
- * Never match to a volatile EC, except when we are looking at another
- * reference to the same volatile SortGroupClause.
+ * If below an outer join, don't match constants: they're not as
+ * constant as they look.
*/
- if (cur_ec->ec_has_volatile &&
- (sortref == 0 || sortref != cur_ec->ec_sortref))
- continue;
-
- if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ if (ec1->ec_below_outer_join &&
+ em1->em_is_const)
continue;
- foreach(lc2, cur_ec->ec_members)
+ foreach(lc2, ec2->ec_members)
{
- EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
-
- /*
- * If below an outer join, don't match constants: they're not as
- * constant as they look.
- */
- if (cur_ec->ec_below_outer_join &&
- cur_em->em_is_const)
- continue;
+ EquivalenceMember *em2 = (EquivalenceMember *) lfirst(lc2);
- if (expr_datatype == cur_em->em_datatype &&
- equal(expr, cur_em->em_expr))
- return cur_ec; /* Match! */
+ if (em1->em_datatype == em2->em_datatype &&
+ equal(em1->em_expr, em2->em_expr))
+ return 0;
}
}
+ return 1;
+}
+
+
+/*
+ * build_eq_classes_hash
+ * Build the initial contents of PlannerInfo.eq_classes_hash
+ * for faster search in PlannerInfo.eq_classes. This is used
+ * to make get_eclass_for_sort_expr() faster for large
+ * inheritance trees.
+ */
+static void
+build_eq_classes_hash(PlannerInfo *root)
+{
+ MemoryContext oldcontext;
+ HASHCTL info;
+
+ ListCell *lc;
+
+ info.match = eq_classes_match;
+ info.hcxt = root->planner_cxt;
+ info.keysize = sizeof(Relids);
+ info.entrysize = sizeof(EquivalenceClass);
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ root->eq_classes_hash = hash_create("eq_classes", 2048, &info,
+ HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+
+ foreach(lc, root->eq_classes)
+ {
+ EquivalenceClass *ec = lfirst(lc);
+ bool found;
+
+ hash_search_with_hash_value(root->eq_classes_hash, ec,
+ bms_hash_value(ec->ec_relids),
+ HASH_ENTER, &found);
+ Assert(!found);
+ }
+}
+
+
+static EquivalenceClass *
+build_new_ec(PlannerInfo *root,
+ Expr *expr,
+ Oid expr_datatype,
+ List *opfamilies,
+ Index sortref)
+{
+ MemoryContext oldcontext;
+ EquivalenceClass *newec;
+ EquivalenceMember *newem;
+
/*
- * No match, so build a new single-member EC
- *
* Here, we must be sure that we construct the EC in the right context. We
* can assume, however, that the passed expr is long-lived.
*/
@@ -471,13 +500,132 @@ get_eclass_for_sort_expr(PlannerInfo *ro
}
}
- root->eq_classes = lappend(root->eq_classes, newec);
-
MemoryContextSwitchTo(oldcontext);
return newec;
}
+/*
+ * get_eclass_for_sort_expr
+ * Given an expression and opfamily info, find an existing equivalence
+ * class it is a member of; if none, build a new single-member
+ * EquivalenceClass for it.
+ *
+ * sortref is the SortGroupRef of the originating SortGroupClause, if any,
+ * or zero if not. (It should never be zero if the expression is volatile!)
+ *
+ * This can be used safely both before and after EquivalenceClass merging;
+ * since it never causes merging it does not invalidate any existing ECs
+ * or PathKeys.
+ *
+ * Note: opfamilies must be chosen consistently with the way
+ * process_equivalence() would do; that is, generated from a mergejoinable
+ * equality operator. Else we might fail to detect valid equivalences,
+ * generating poor (but not incorrect) plans.
+ */
+EquivalenceClass *
+get_eclass_for_sort_expr(PlannerInfo *root,
+ Expr *expr,
+ Oid expr_datatype,
+ List *opfamilies,
+ Index sortref)
+{
+ EquivalenceClass *newec;
+ ListCell *lc1;
+ MemoryContext oldcontext;
+
+ if (root->eq_classes_hash == NULL &&
+ list_length(root->eq_classes) > 32)
+ build_eq_classes_hash(root);
+
+ if (root->eq_classes_hash == NULL)
+ {
+ /*
+ * Scan through the existing EquivalenceClasses for a match
+ */
+ foreach(lc1, root->eq_classes)
+ {
+ EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ ListCell *lc2;
+
+ /*
+ * Never match to a volatile EC, except when we are looking at another
+ * reference to the same volatile SortGroupClause.
+ */
+ if (cur_ec->ec_has_volatile &&
+ (sortref == 0 || sortref != cur_ec->ec_sortref))
+ continue;
+
+ if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ continue;
+
+ foreach(lc2, cur_ec->ec_members)
+ {
+ EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+ /*
+ * If below an outer join, don't match constants: they're not as
+ * constant as they look.
+ */
+ if (cur_ec->ec_below_outer_join &&
+ cur_em->em_is_const)
+ continue;
+
+ if (expr_datatype == cur_em->em_datatype &&
+ equal(expr, cur_em->em_expr))
+ return cur_ec; /* Match! */
+ }
+ }
+
+ /*
+ * No match, so build a new single-member EC
+ */
+ newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+ root->eq_classes = lappend(root->eq_classes, newec);
+ MemoryContextSwitchTo(oldcontext);
+
+ return newec;
+ }
+ else
+ {
+ EquivalenceClass *ec_found;
+ bool found;
+ uint32 hashval;
+
+ /*
+ * Build the new single-member EC to match against in hash_search()
+ */
+ newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+
+ hashval = bms_hash_value(newec->ec_relids);
+
+ ec_found = hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_FIND, &found);
+
+ if (found)
+ {
+ list_free(newec->ec_opfamilies);
+ list_free_deep(newec->ec_members);
+ bms_free(newec->ec_relids);
+ pfree(newec);
+ return ec_found;
+ }
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ root->eq_classes = lappend(root->eq_classes, newec);
+
+ hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_ENTER, &found);
+
+ Assert(!found);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return newec;
+ }
+}
+
/*
* generate_base_implied_equalities
diff -durpN postgresql.orig/src/backend/optimizer/path/pathkeys.c postgresql.1/src/backend/optimizer/path/pathkeys.c
--- postgresql.orig/src/backend/optimizer/path/pathkeys.c 2010-09-21 13:49:57.000000000 +0200
+++ postgresql.1/src/backend/optimizer/path/pathkeys.c 2010-10-28 11:19:35.000000000 +0200
@@ -17,6 +17,7 @@
*/
#include "postgres.h"
+#include "access/hash.h"
#include "access/skey.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
@@ -27,6 +28,7 @@
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "parser/parsetree.h"
+#include "utils/hsearch.h"
#include "utils/lsyscache.h"
@@ -72,6 +74,73 @@ makePathKey(EquivalenceClass *eclass, Oi
}
/*
+ * pk_hash
+ * hashtable hash function for PlannerInfo.canon_pathkeys_hash
+ */
+static uint32
+pk_hash(const void *key, Size keysize)
+{
+ PathKey *pk = (PathKey *) key;
+ intptr_t ptr = (intptr_t) pk->pk_eclass;
+
+ return DatumGetUInt32(hash_uint32((uint32)ptr));
+}
+
+/*
+ * pk_match
+ * hashtable match function for PlannerInfo.canon_pathkeys_hash
+ */
+static int
+pk_match(const void *key1, const void *key2, Size keysize)
+{
+ PathKey *pk1 = (PathKey *)key1;
+ PathKey *pk2 = (PathKey *)key2;
+
+ if (pk1->pk_eclass == pk2->pk_eclass &&
+ pk1->pk_opfamily == pk2->pk_opfamily &&
+ pk1->pk_strategy == pk2->pk_strategy &&
+ pk1->pk_nulls_first == pk2->pk_nulls_first)
+ return 0;
+ return 1;
+}
+
+/*
+ * build_canonical_pathkey_hash
+ *
+ * Build PlannerInfo.canon_pathkeys_hash from canon_pathkeys list.
+ */
+static void
+build_canonical_pathkey_hash(PlannerInfo *root)
+{
+ MemoryContext oldcontext;
+ HASHCTL info;
+
+ ListCell *lc;
+
+ info.hash = pk_hash;
+ info.match = pk_match;
+ info.hcxt = root->planner_cxt;
+ info.keysize = sizeof(EquivalenceClass *);
+ info.entrysize = sizeof(PathKey);
+
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ root->canon_pathkeys_hash = hash_create("canon_pathkeys", 2048, &info,
+ HASH_FUNCTION | HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+
+ foreach(lc, root->canon_pathkeys)
+ {
+ PathKey *pk = lfirst(lc);
+ bool found;
+
+ hash_search_with_hash_value(root->canon_pathkeys_hash, pk,
+ DatumGetUInt32(hash_any((const unsigned char *) pk, sizeof(PathKey))),
+ HASH_ENTER, &found);
+ Assert(!found);
+ }
+}
+
+/*
* make_canonical_pathkey
* Given the parameters for a PathKey, find any pre-existing matching
* pathkey in the query's list of "canonical" pathkeys. Make a new
@@ -85,7 +154,7 @@ make_canonical_pathkey(PlannerInfo *root
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first)
{
- PathKey *pk;
+ PathKey *pk, *pk_found;
ListCell *lc;
MemoryContext oldcontext;
@@ -93,26 +162,64 @@ make_canonical_pathkey(PlannerInfo *root
while (eclass->ec_merged)
eclass = eclass->ec_merged;
- foreach(lc, root->canon_pathkeys)
+ if (root->canon_pathkeys_hash == NULL &&
+ list_length(root->canon_pathkeys) > 32)
+ build_canonical_pathkey_hash(root);
+
+ if (root->canon_pathkeys_hash == NULL)
{
- pk = (PathKey *) lfirst(lc);
- if (eclass == pk->pk_eclass &&
- opfamily == pk->pk_opfamily &&
- strategy == pk->pk_strategy &&
- nulls_first == pk->pk_nulls_first)
- return pk;
+ foreach(lc, root->canon_pathkeys)
+ {
+ pk = (PathKey *) lfirst(lc);
+ if (eclass == pk->pk_eclass &&
+ opfamily == pk->pk_opfamily &&
+ strategy == pk->pk_strategy &&
+ nulls_first == pk->pk_nulls_first)
+ return pk;
+ }
+
+ /*
+ * Be sure canonical pathkeys are allocated in the main planning context.
+ * Not an issue in normal planning, but it is for GEQO.
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ pk = makePathKey(eclass, opfamily, strategy, nulls_first);
+ root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+
+ MemoryContextSwitchTo(oldcontext);
}
+ else
+ {
+ uint32 hashval;
+ bool found;
- /*
- * Be sure canonical pathkeys are allocated in the main planning context.
- * Not an issue in normal planning, but it is for GEQO.
- */
- oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+ /*
+ * Be sure canonical pathkeys are allocated in the main planning context.
+ * Not an issue in normal planning, but it is for GEQO.
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
- pk = makePathKey(eclass, opfamily, strategy, nulls_first);
- root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+ pk = makePathKey(eclass, opfamily, strategy, nulls_first);
- MemoryContextSwitchTo(oldcontext);
+ hashval = DatumGetInt32(hash_any((const unsigned char *) pk, sizeof(PathKey)));
+
+
+ pk_found = hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_FIND, &found);
+
+ if (found)
+ {
+ pfree(pk);
+ pk = pk_found;
+ }
+ else
+ {
+ root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+ hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_ENTER, &found);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ }
return pk;
}
diff -durpN postgresql.orig/src/backend/optimizer/plan/planmain.c postgresql.1/src/backend/optimizer/plan/planmain.c
--- postgresql.orig/src/backend/optimizer/plan/planmain.c 2010-10-08 11:04:23.000000000 +0200
+++ postgresql.1/src/backend/optimizer/plan/planmain.c 2010-10-28 09:35:37.000000000 +0200
@@ -27,6 +27,7 @@
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
#include "optimizer/tlist.h"
+#include "utils/hsearch.h"
#include "utils/selfuncs.h"
@@ -118,6 +119,11 @@ query_planner(PlannerInfo *root, List *t
* something like "SELECT 2+2 ORDER BY 1".
*/
root->canon_pathkeys = NIL;
+ if (root->canon_pathkeys_hash)
+ {
+ hash_destroy(root->canon_pathkeys_hash);
+ root->canon_pathkeys_hash = NULL;
+ }
root->query_pathkeys = canonicalize_pathkeys(root,
root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root,
@@ -146,6 +152,11 @@ query_planner(PlannerInfo *root, List *t
root->join_rel_level = NULL;
root->join_cur_level = 0;
root->canon_pathkeys = NIL;
+ if (root->canon_pathkeys_hash)
+ {
+ hash_destroy(root->canon_pathkeys_hash);
+ root->canon_pathkeys_hash = NULL;
+ }
root->left_join_clauses = NIL;
root->right_join_clauses = NIL;
root->full_join_clauses = NIL;
diff -durpN postgresql.orig/src/include/nodes/relation.h postgresql.1/src/include/nodes/relation.h
--- postgresql.orig/src/include/nodes/relation.h 2010-10-15 10:31:47.000000000 +0200
+++ postgresql.1/src/include/nodes/relation.h 2010-10-28 11:26:02.000000000 +0200
@@ -159,8 +159,10 @@ typedef struct PlannerInfo
List *cte_plan_ids; /* per-CTE-item list of subplan IDs */
List *eq_classes; /* list of active EquivalenceClasses */
+ struct HTAB *eq_classes_hash; /* optional hashtable for equivalence classes */
List *canon_pathkeys; /* list of "canonical" PathKeys */
+ struct HTAB *canon_pathkeys_hash; /* optional hashtable for "canonical" PathKeys */
List *left_join_clauses; /* list of RestrictInfos for
* mergejoinable outer join clauses
Binary files postgresql.orig/src/timezone/gmon.out and postgresql.1/src/timezone/gmon.out differ
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers