Heikki Linnakangas írta:
> On 26.10.2010 18:34, Boszormenyi Zoltan wrote:
>> thank you very much for pointing me to dynahash, here is the
>> next version that finally seems to work.
>>
>> Two patches are attached, the first is the absolute minimum for
>> making it work, this still has the Tree type for canon_pathkeys
>> and eq_classes got the same treatment as join_rel_list/join_rel_hash
>> has in the current sources: if the list grows larger than 32, a hash
>> table
>> is created. It seems to be be enough for doing in for
>>       get_eclass_for_sort_expr()
>> only, the other users of eq_classes aren't bothered by this change.
>
> That's better, but can't you use dynahash for canon_pathkeys as well?

Here's a purely dynahash solution. It's somewhat slower than
the tree version, 0.45 vs 0.41 seconds in the cached case for the
previously posted test case.

Best regards,
Zoltán Böszörményi

-- 
----------------------------------
Zoltán Böszörményi
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
     http://www.postgresql.at/

diff -durpN postgresql.orig/src/backend/optimizer/path/equivclass.c postgresql.1/src/backend/optimizer/path/equivclass.c
--- postgresql.orig/src/backend/optimizer/path/equivclass.c	2010-10-15 10:31:47.000000000 +0200
+++ postgresql.1/src/backend/optimizer/path/equivclass.c	2010-10-26 17:01:57.000000000 +0200
@@ -24,6 +24,7 @@
 #include "optimizer/planmain.h"
 #include "optimizer/prep.h"
 #include "optimizer/var.h"
+#include "utils/hsearch.h"
 #include "utils/lsyscache.h"
 
 
@@ -360,75 +361,103 @@ add_eq_member(EquivalenceClass *ec, Expr
 
 
 /*
- * get_eclass_for_sort_expr
- *	  Given an expression and opfamily info, find an existing equivalence
- *	  class it is a member of; if none, build a new single-member
- *	  EquivalenceClass for it.
- *
- * sortref is the SortGroupRef of the originating SortGroupClause, if any,
- * or zero if not.	(It should never be zero if the expression is volatile!)
- *
- * This can be used safely both before and after EquivalenceClass merging;
- * since it never causes merging it does not invalidate any existing ECs
- * or PathKeys.
- *
- * Note: opfamilies must be chosen consistently with the way
- * process_equivalence() would do; that is, generated from a mergejoinable
- * equality operator.  Else we might fail to detect valid equivalences,
- * generating poor (but not incorrect) plans.
+ * eq_classes_match - matching function for eq_classes_hash in PlannerInfo
  */
-EquivalenceClass *
-get_eclass_for_sort_expr(PlannerInfo *root,
-						 Expr *expr,
-						 Oid expr_datatype,
-						 List *opfamilies,
-						 Index sortref)
+static int
+eq_classes_match(const void *key1, const void *key2, Size keysize)
 {
-	EquivalenceClass *newec;
-	EquivalenceMember *newem;
+	EquivalenceClass *ec1 = (EquivalenceClass *) key1; /* this is in the hashtable */
+	EquivalenceClass *ec2 = (EquivalenceClass *) key2; /* this is the new matched entry */
 	ListCell   *lc1;
-	MemoryContext oldcontext;
+	ListCell   *lc2;
 
 	/*
-	 * Scan through the existing EquivalenceClasses for a match
+	 * Never match to a volatile EC, except when we are looking at another
+	 * reference to the same volatile SortGroupClause.
 	 */
-	foreach(lc1, root->eq_classes)
+	if (ec1->ec_has_volatile &&
+		(ec2->ec_sortref == 0 || ec2->ec_sortref != ec1->ec_sortref))
+		return 1;
+
+	if (!equal(ec1->ec_opfamilies, ec2->ec_opfamilies))
+		return 1;
+
+	foreach(lc1, ec1->ec_members)
 	{
-		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
-		ListCell   *lc2;
+		EquivalenceMember *em1 = (EquivalenceMember *) lfirst(lc1);
 
 		/*
-		 * Never match to a volatile EC, except when we are looking at another
-		 * reference to the same volatile SortGroupClause.
+		 * If below an outer join, don't match constants: they're not as
+		 * constant as they look.
 		 */
-		if (cur_ec->ec_has_volatile &&
-			(sortref == 0 || sortref != cur_ec->ec_sortref))
-			continue;
-
-		if (!equal(opfamilies, cur_ec->ec_opfamilies))
+		if (ec1->ec_below_outer_join &&
+			em1->em_is_const)
 			continue;
 
-		foreach(lc2, cur_ec->ec_members)
+		foreach(lc2, ec2->ec_members)
 		{
-			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
-
-			/*
-			 * If below an outer join, don't match constants: they're not as
-			 * constant as they look.
-			 */
-			if (cur_ec->ec_below_outer_join &&
-				cur_em->em_is_const)
-				continue;
+			EquivalenceMember *em2 = (EquivalenceMember *) lfirst(lc2);
 
-			if (expr_datatype == cur_em->em_datatype &&
-				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			if (em1->em_datatype == em2->em_datatype &&
+				equal(em1->em_expr, em2->em_expr))
+				return 0;
 		}
 	}
 
+	return 1;
+}
+
+
+/*
+ * build_eq_classes_hash
+ *	Build the initial contents of PlannerInfo.eq_classes_hash
+ *	for faster search in PlannerInfo.eq_classes. This is used
+ *	to  make   get_eclass_for_sort_expr()  faster  for  large
+ *	inheritance trees.
+ */
+static void
+build_eq_classes_hash(PlannerInfo *root)
+{
+	MemoryContext	oldcontext;
+	HASHCTL	info;
+	
+	ListCell   *lc;
+
+	info.match = eq_classes_match;
+	info.hcxt = root->planner_cxt;
+	info.keysize = sizeof(Relids);
+	info.entrysize = sizeof(EquivalenceClass);
+
+	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+	root->eq_classes_hash = hash_create("eq_classes", 2048, &info,
+						HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+
+	foreach(lc, root->eq_classes)
+	{
+		EquivalenceClass *ec = lfirst(lc);
+		bool	found;
+
+		hash_search_with_hash_value(root->eq_classes_hash, ec,
+								bms_hash_value(ec->ec_relids),
+								HASH_ENTER, &found);
+		Assert(!found);
+	}
+}
+
+
+static EquivalenceClass *
+build_new_ec(PlannerInfo *root,
+						 Expr *expr,
+						 Oid expr_datatype,
+						 List *opfamilies,
+						 Index sortref)
+{
+	MemoryContext	oldcontext;
+	EquivalenceClass *newec;
+	EquivalenceMember *newem;
+
 	/*
-	 * No match, so build a new single-member EC
-	 *
 	 * Here, we must be sure that we construct the EC in the right context. We
 	 * can assume, however, that the passed expr is long-lived.
 	 */
@@ -471,13 +500,132 @@ get_eclass_for_sort_expr(PlannerInfo *ro
 		}
 	}
 
-	root->eq_classes = lappend(root->eq_classes, newec);
-
 	MemoryContextSwitchTo(oldcontext);
 
 	return newec;
 }
 
+/*
+ * get_eclass_for_sort_expr
+ *	  Given an expression and opfamily info, find an existing equivalence
+ *	  class it is a member of; if none, build a new single-member
+ *	  EquivalenceClass for it.
+ *
+ * sortref is the SortGroupRef of the originating SortGroupClause, if any,
+ * or zero if not.	(It should never be zero if the expression is volatile!)
+ *
+ * This can be used safely both before and after EquivalenceClass merging;
+ * since it never causes merging it does not invalidate any existing ECs
+ * or PathKeys.
+ *
+ * Note: opfamilies must be chosen consistently with the way
+ * process_equivalence() would do; that is, generated from a mergejoinable
+ * equality operator.  Else we might fail to detect valid equivalences,
+ * generating poor (but not incorrect) plans.
+ */
+EquivalenceClass *
+get_eclass_for_sort_expr(PlannerInfo *root,
+						 Expr *expr,
+						 Oid expr_datatype,
+						 List *opfamilies,
+						 Index sortref)
+{
+	EquivalenceClass *newec;
+	ListCell   *lc1;
+	MemoryContext oldcontext;
+
+	if (root->eq_classes_hash == NULL &&
+		list_length(root->eq_classes) > 32)
+		build_eq_classes_hash(root);
+
+	if (root->eq_classes_hash == NULL)
+	{
+		/*
+		 * Scan through the existing EquivalenceClasses for a match
+		 */
+		foreach(lc1, root->eq_classes)
+		{
+			EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+			ListCell   *lc2;
+
+			/*
+			 * Never match to a volatile EC, except when we are looking at another
+			 * reference to the same volatile SortGroupClause.
+			 */
+			if (cur_ec->ec_has_volatile &&
+				(sortref == 0 || sortref != cur_ec->ec_sortref))
+				continue;
+
+			if (!equal(opfamilies, cur_ec->ec_opfamilies))
+				continue;
+
+			foreach(lc2, cur_ec->ec_members)
+			{
+				EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+
+				/*
+				 * If below an outer join, don't match constants: they're not as
+				 * constant as they look.
+				 */
+				if (cur_ec->ec_below_outer_join &&
+					cur_em->em_is_const)
+					continue;
+
+				if (expr_datatype == cur_em->em_datatype &&
+					equal(expr, cur_em->em_expr))
+					return cur_ec;	/* Match! */
+			}
+		}
+
+		/*
+		 * No match, so build a new single-member EC
+		 */
+		newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+
+		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+		root->eq_classes = lappend(root->eq_classes, newec);
+		MemoryContextSwitchTo(oldcontext);
+
+		return newec;
+	}
+	else
+	{
+		EquivalenceClass *ec_found;
+		bool	found;
+		uint32	hashval;
+
+		/*
+		 * Build the new single-member EC to match against in hash_search()
+		 */
+		newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+
+		hashval = bms_hash_value(newec->ec_relids);
+
+		ec_found = hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_FIND, &found);
+
+		if (found)
+		{
+			list_free(newec->ec_opfamilies);
+			list_free_deep(newec->ec_members);
+			bms_free(newec->ec_relids);
+			pfree(newec);
+			return ec_found;
+		}
+
+		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+		root->eq_classes = lappend(root->eq_classes, newec);
+
+		hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_ENTER, &found);
+
+		Assert(!found);
+
+		MemoryContextSwitchTo(oldcontext);
+
+		return newec;
+	}
+}
+
 
 /*
  * generate_base_implied_equalities
diff -durpN postgresql.orig/src/backend/optimizer/path/pathkeys.c postgresql.1/src/backend/optimizer/path/pathkeys.c
--- postgresql.orig/src/backend/optimizer/path/pathkeys.c	2010-09-21 13:49:57.000000000 +0200
+++ postgresql.1/src/backend/optimizer/path/pathkeys.c	2010-10-28 11:19:35.000000000 +0200
@@ -17,6 +17,7 @@
  */
 #include "postgres.h"
 
+#include "access/hash.h"
 #include "access/skey.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -27,6 +28,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "parser/parsetree.h"
+#include "utils/hsearch.h"
 #include "utils/lsyscache.h"
 
 
@@ -72,6 +74,73 @@ makePathKey(EquivalenceClass *eclass, Oi
 }
 
 /*
+ * pk_hash
+ *		hashtable hash function for PlannerInfo.canon_pathkeys_hash
+ */
+static uint32
+pk_hash(const void *key, Size keysize)
+{
+	PathKey	   *pk = (PathKey *) key;
+	intptr_t	ptr = (intptr_t) pk->pk_eclass;
+
+	return DatumGetUInt32(hash_uint32((uint32)ptr));
+}
+
+/*
+ * pk_match
+ *		hashtable match function for PlannerInfo.canon_pathkeys_hash
+ */
+static int
+pk_match(const void *key1, const void *key2, Size keysize)
+{
+	PathKey	   *pk1 = (PathKey *)key1;
+	PathKey	   *pk2 = (PathKey *)key2;
+
+	if (pk1->pk_eclass == pk2->pk_eclass &&
+		pk1->pk_opfamily == pk2->pk_opfamily &&
+		pk1->pk_strategy == pk2->pk_strategy &&
+		pk1->pk_nulls_first == pk2->pk_nulls_first)
+		return 0;
+	return 1;
+}
+
+/*
+ * build_canonical_pathkey_hash
+ *
+ * Build PlannerInfo.canon_pathkeys_hash from canon_pathkeys list.
+ */
+static void
+build_canonical_pathkey_hash(PlannerInfo *root)
+{
+	MemoryContext	oldcontext;
+	HASHCTL		info;
+
+	ListCell   *lc;
+
+	info.hash = pk_hash;
+	info.match = pk_match;
+	info.hcxt = root->planner_cxt;
+	info.keysize = sizeof(EquivalenceClass *);
+	info.entrysize = sizeof(PathKey);
+
+	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+	root->canon_pathkeys_hash = hash_create("canon_pathkeys", 2048, &info,
+							HASH_FUNCTION | HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+
+	foreach(lc, root->canon_pathkeys)
+	{
+		PathKey *pk = lfirst(lc);
+		bool	found;
+
+		hash_search_with_hash_value(root->canon_pathkeys_hash, pk,
+							DatumGetUInt32(hash_any((const unsigned char *) pk, sizeof(PathKey))),
+							HASH_ENTER, &found);
+		Assert(!found);
+	}
+}
+
+/*
  * make_canonical_pathkey
  *	  Given the parameters for a PathKey, find any pre-existing matching
  *	  pathkey in the query's list of "canonical" pathkeys.  Make a new
@@ -85,7 +154,7 @@ make_canonical_pathkey(PlannerInfo *root
 					   EquivalenceClass *eclass, Oid opfamily,
 					   int strategy, bool nulls_first)
 {
-	PathKey    *pk;
+	PathKey    *pk, *pk_found;
 	ListCell   *lc;
 	MemoryContext oldcontext;
 
@@ -93,26 +162,64 @@ make_canonical_pathkey(PlannerInfo *root
 	while (eclass->ec_merged)
 		eclass = eclass->ec_merged;
 
-	foreach(lc, root->canon_pathkeys)
+	if (root->canon_pathkeys_hash == NULL &&
+		list_length(root->canon_pathkeys) > 32)
+		build_canonical_pathkey_hash(root);
+
+	if (root->canon_pathkeys_hash == NULL)
 	{
-		pk = (PathKey *) lfirst(lc);
-		if (eclass == pk->pk_eclass &&
-			opfamily == pk->pk_opfamily &&
-			strategy == pk->pk_strategy &&
-			nulls_first == pk->pk_nulls_first)
-			return pk;
+		foreach(lc, root->canon_pathkeys)
+		{
+			pk = (PathKey *) lfirst(lc);
+			if (eclass == pk->pk_eclass &&
+				opfamily == pk->pk_opfamily &&
+				strategy == pk->pk_strategy &&
+				nulls_first == pk->pk_nulls_first)
+				return pk;
+		}
+
+		/*
+		 * Be sure canonical pathkeys are allocated in the main planning context.
+		 * Not an issue in normal planning, but it is for GEQO.
+		 */
+		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+		pk = makePathKey(eclass, opfamily, strategy, nulls_first);
+		root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+
+		MemoryContextSwitchTo(oldcontext);
 	}
+	else
+	{
+		uint32	hashval;
+		bool	found;
 
-	/*
-	 * Be sure canonical pathkeys are allocated in the main planning context.
-	 * Not an issue in normal planning, but it is for GEQO.
-	 */
-	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+		/*
+		 * Be sure canonical pathkeys are allocated in the main planning context.
+		 * Not an issue in normal planning, but it is for GEQO.
+		 */
+		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 
-	pk = makePathKey(eclass, opfamily, strategy, nulls_first);
-	root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+		pk = makePathKey(eclass, opfamily, strategy, nulls_first);
 
-	MemoryContextSwitchTo(oldcontext);
+		hashval = DatumGetInt32(hash_any((const unsigned char *) pk, sizeof(PathKey)));
+
+
+		pk_found = hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_FIND, &found);
+
+		if (found)
+		{
+			pfree(pk);
+			pk = pk_found;
+		}
+		else
+		{
+			root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+			hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_ENTER, &found);
+		}
+
+		MemoryContextSwitchTo(oldcontext);
+	}
 
 	return pk;
 }
diff -durpN postgresql.orig/src/backend/optimizer/plan/planmain.c postgresql.1/src/backend/optimizer/plan/planmain.c
--- postgresql.orig/src/backend/optimizer/plan/planmain.c	2010-10-08 11:04:23.000000000 +0200
+++ postgresql.1/src/backend/optimizer/plan/planmain.c	2010-10-28 09:35:37.000000000 +0200
@@ -27,6 +27,7 @@
 #include "optimizer/placeholder.h"
 #include "optimizer/planmain.h"
 #include "optimizer/tlist.h"
+#include "utils/hsearch.h"
 #include "utils/selfuncs.h"
 
 
@@ -118,6 +119,11 @@ query_planner(PlannerInfo *root, List *t
 		 * something like "SELECT 2+2 ORDER BY 1".
 		 */
 		root->canon_pathkeys = NIL;
+		if (root->canon_pathkeys_hash)
+		{
+			hash_destroy(root->canon_pathkeys_hash);
+			root->canon_pathkeys_hash = NULL;
+		}
 		root->query_pathkeys = canonicalize_pathkeys(root,
 													 root->query_pathkeys);
 		root->group_pathkeys = canonicalize_pathkeys(root,
@@ -146,6 +152,11 @@ query_planner(PlannerInfo *root, List *t
 	root->join_rel_level = NULL;
 	root->join_cur_level = 0;
 	root->canon_pathkeys = NIL;
+	if (root->canon_pathkeys_hash)
+	{
+		hash_destroy(root->canon_pathkeys_hash);
+		root->canon_pathkeys_hash = NULL;
+	}
 	root->left_join_clauses = NIL;
 	root->right_join_clauses = NIL;
 	root->full_join_clauses = NIL;
diff -durpN postgresql.orig/src/include/nodes/relation.h postgresql.1/src/include/nodes/relation.h
--- postgresql.orig/src/include/nodes/relation.h	2010-10-15 10:31:47.000000000 +0200
+++ postgresql.1/src/include/nodes/relation.h	2010-10-28 11:26:02.000000000 +0200
@@ -159,8 +159,10 @@ typedef struct PlannerInfo
 	List	   *cte_plan_ids;	/* per-CTE-item list of subplan IDs */
 
 	List	   *eq_classes;		/* list of active EquivalenceClasses */
+	struct HTAB *eq_classes_hash;	/* optional hashtable for equivalence classes */
 
 	List	   *canon_pathkeys; /* list of "canonical" PathKeys */
+	struct HTAB *canon_pathkeys_hash;	/* optional hashtable for "canonical" PathKeys */
 
 	List	   *left_join_clauses;		/* list of RestrictInfos for
 										 * mergejoinable outer join clauses
Binary files postgresql.orig/src/timezone/gmon.out and postgresql.1/src/timezone/gmon.out differ
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to