Boszormenyi Zoltan írta:
> Heikki Linnakangas írta:
>   
>> On 26.10.2010 18:34, Boszormenyi Zoltan wrote:
>>     
>>> thank you very much for pointing me to dynahash, here is the
>>> next version that finally seems to work.
>>>
>>> Two patches are attached, the first is the absolute minimum for
>>> making it work, this still has the Tree type for canon_pathkeys
>>> and eq_classes got the same treatment as join_rel_list/join_rel_hash
>>> has in the current sources: if the list grows larger than 32, a hash
>>> table
>>> is created. It seems to be be enough for doing in for
>>>       get_eclass_for_sort_expr()
>>> only, the other users of eq_classes aren't bothered by this change.
>>>       
>> That's better, but can't you use dynahash for canon_pathkeys as well?
>>     
>
> Here's a purely dynahash solution. It's somewhat slower than
> the tree version, 0.45 vs 0.41 seconds in the cached case for the
> previously posted test case.
>   

And now in context diff, sorry for my affection towards unified diffs. :-)

> Best regards,
> Zoltán Böszörményi
>
>   


-- 
----------------------------------
Zoltán Böszörményi
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt, Austria
Web: http://www.postgresql-support.de
     http://www.postgresql.at/

diff -dcrpN postgresql.orig/src/backend/optimizer/path/equivclass.c postgresql.1/src/backend/optimizer/path/equivclass.c
*** postgresql.orig/src/backend/optimizer/path/equivclass.c	2010-10-15 10:31:47.000000000 +0200
--- postgresql.1/src/backend/optimizer/path/equivclass.c	2010-10-26 17:01:57.000000000 +0200
***************
*** 24,29 ****
--- 24,30 ----
  #include "optimizer/planmain.h"
  #include "optimizer/prep.h"
  #include "optimizer/var.h"
+ #include "utils/hsearch.h"
  #include "utils/lsyscache.h"
  
  
*************** add_eq_member(EquivalenceClass *ec, Expr
*** 360,434 ****
  
  
  /*
!  * get_eclass_for_sort_expr
!  *	  Given an expression and opfamily info, find an existing equivalence
!  *	  class it is a member of; if none, build a new single-member
!  *	  EquivalenceClass for it.
!  *
!  * sortref is the SortGroupRef of the originating SortGroupClause, if any,
!  * or zero if not.	(It should never be zero if the expression is volatile!)
!  *
!  * This can be used safely both before and after EquivalenceClass merging;
!  * since it never causes merging it does not invalidate any existing ECs
!  * or PathKeys.
!  *
!  * Note: opfamilies must be chosen consistently with the way
!  * process_equivalence() would do; that is, generated from a mergejoinable
!  * equality operator.  Else we might fail to detect valid equivalences,
!  * generating poor (but not incorrect) plans.
   */
! EquivalenceClass *
! get_eclass_for_sort_expr(PlannerInfo *root,
! 						 Expr *expr,
! 						 Oid expr_datatype,
! 						 List *opfamilies,
! 						 Index sortref)
  {
! 	EquivalenceClass *newec;
! 	EquivalenceMember *newem;
  	ListCell   *lc1;
! 	MemoryContext oldcontext;
  
  	/*
! 	 * Scan through the existing EquivalenceClasses for a match
  	 */
! 	foreach(lc1, root->eq_classes)
  	{
! 		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
! 		ListCell   *lc2;
  
  		/*
! 		 * Never match to a volatile EC, except when we are looking at another
! 		 * reference to the same volatile SortGroupClause.
  		 */
! 		if (cur_ec->ec_has_volatile &&
! 			(sortref == 0 || sortref != cur_ec->ec_sortref))
! 			continue;
! 
! 		if (!equal(opfamilies, cur_ec->ec_opfamilies))
  			continue;
  
! 		foreach(lc2, cur_ec->ec_members)
  		{
! 			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
! 
! 			/*
! 			 * If below an outer join, don't match constants: they're not as
! 			 * constant as they look.
! 			 */
! 			if (cur_ec->ec_below_outer_join &&
! 				cur_em->em_is_const)
! 				continue;
  
! 			if (expr_datatype == cur_em->em_datatype &&
! 				equal(expr, cur_em->em_expr))
! 				return cur_ec;	/* Match! */
  		}
  	}
  
  	/*
- 	 * No match, so build a new single-member EC
- 	 *
  	 * Here, we must be sure that we construct the EC in the right context. We
  	 * can assume, however, that the passed expr is long-lived.
  	 */
--- 361,463 ----
  
  
  /*
!  * eq_classes_match - matching function for eq_classes_hash in PlannerInfo
   */
! static int
! eq_classes_match(const void *key1, const void *key2, Size keysize)
  {
! 	EquivalenceClass *ec1 = (EquivalenceClass *) key1; /* this is in the hashtable */
! 	EquivalenceClass *ec2 = (EquivalenceClass *) key2; /* this is the new matched entry */
  	ListCell   *lc1;
! 	ListCell   *lc2;
  
  	/*
! 	 * Never match to a volatile EC, except when we are looking at another
! 	 * reference to the same volatile SortGroupClause.
  	 */
! 	if (ec1->ec_has_volatile &&
! 		(ec2->ec_sortref == 0 || ec2->ec_sortref != ec1->ec_sortref))
! 		return 1;
! 
! 	if (!equal(ec1->ec_opfamilies, ec2->ec_opfamilies))
! 		return 1;
! 
! 	foreach(lc1, ec1->ec_members)
  	{
! 		EquivalenceMember *em1 = (EquivalenceMember *) lfirst(lc1);
  
  		/*
! 		 * If below an outer join, don't match constants: they're not as
! 		 * constant as they look.
  		 */
! 		if (ec1->ec_below_outer_join &&
! 			em1->em_is_const)
  			continue;
  
! 		foreach(lc2, ec2->ec_members)
  		{
! 			EquivalenceMember *em2 = (EquivalenceMember *) lfirst(lc2);
  
! 			if (em1->em_datatype == em2->em_datatype &&
! 				equal(em1->em_expr, em2->em_expr))
! 				return 0;
  		}
  	}
  
+ 	return 1;
+ }
+ 
+ 
+ /*
+  * build_eq_classes_hash
+  *	Build the initial contents of PlannerInfo.eq_classes_hash
+  *	for faster search in PlannerInfo.eq_classes. This is used
+  *	to  make   get_eclass_for_sort_expr()  faster  for  large
+  *	inheritance trees.
+  */
+ static void
+ build_eq_classes_hash(PlannerInfo *root)
+ {
+ 	MemoryContext	oldcontext;
+ 	HASHCTL	info;
+ 	
+ 	ListCell   *lc;
+ 
+ 	info.match = eq_classes_match;
+ 	info.hcxt = root->planner_cxt;
+ 	info.keysize = sizeof(Relids);
+ 	info.entrysize = sizeof(EquivalenceClass);
+ 
+ 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+ 
+ 	root->eq_classes_hash = hash_create("eq_classes", 2048, &info,
+ 						HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+ 
+ 	foreach(lc, root->eq_classes)
+ 	{
+ 		EquivalenceClass *ec = lfirst(lc);
+ 		bool	found;
+ 
+ 		hash_search_with_hash_value(root->eq_classes_hash, ec,
+ 								bms_hash_value(ec->ec_relids),
+ 								HASH_ENTER, &found);
+ 		Assert(!found);
+ 	}
+ }
+ 
+ 
+ static EquivalenceClass *
+ build_new_ec(PlannerInfo *root,
+ 						 Expr *expr,
+ 						 Oid expr_datatype,
+ 						 List *opfamilies,
+ 						 Index sortref)
+ {
+ 	MemoryContext	oldcontext;
+ 	EquivalenceClass *newec;
+ 	EquivalenceMember *newem;
+ 
  	/*
  	 * Here, we must be sure that we construct the EC in the right context. We
  	 * can assume, however, that the passed expr is long-lived.
  	 */
*************** get_eclass_for_sort_expr(PlannerInfo *ro
*** 471,483 ****
  		}
  	}
  
- 	root->eq_classes = lappend(root->eq_classes, newec);
- 
  	MemoryContextSwitchTo(oldcontext);
  
  	return newec;
  }
  
  
  /*
   * generate_base_implied_equalities
--- 500,631 ----
  		}
  	}
  
  	MemoryContextSwitchTo(oldcontext);
  
  	return newec;
  }
  
+ /*
+  * get_eclass_for_sort_expr
+  *	  Given an expression and opfamily info, find an existing equivalence
+  *	  class it is a member of; if none, build a new single-member
+  *	  EquivalenceClass for it.
+  *
+  * sortref is the SortGroupRef of the originating SortGroupClause, if any,
+  * or zero if not.	(It should never be zero if the expression is volatile!)
+  *
+  * This can be used safely both before and after EquivalenceClass merging;
+  * since it never causes merging it does not invalidate any existing ECs
+  * or PathKeys.
+  *
+  * Note: opfamilies must be chosen consistently with the way
+  * process_equivalence() would do; that is, generated from a mergejoinable
+  * equality operator.  Else we might fail to detect valid equivalences,
+  * generating poor (but not incorrect) plans.
+  */
+ EquivalenceClass *
+ get_eclass_for_sort_expr(PlannerInfo *root,
+ 						 Expr *expr,
+ 						 Oid expr_datatype,
+ 						 List *opfamilies,
+ 						 Index sortref)
+ {
+ 	EquivalenceClass *newec;
+ 	ListCell   *lc1;
+ 	MemoryContext oldcontext;
+ 
+ 	if (root->eq_classes_hash == NULL &&
+ 		list_length(root->eq_classes) > 32)
+ 		build_eq_classes_hash(root);
+ 
+ 	if (root->eq_classes_hash == NULL)
+ 	{
+ 		/*
+ 		 * Scan through the existing EquivalenceClasses for a match
+ 		 */
+ 		foreach(lc1, root->eq_classes)
+ 		{
+ 			EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ 			ListCell   *lc2;
+ 
+ 			/*
+ 			 * Never match to a volatile EC, except when we are looking at another
+ 			 * reference to the same volatile SortGroupClause.
+ 			 */
+ 			if (cur_ec->ec_has_volatile &&
+ 				(sortref == 0 || sortref != cur_ec->ec_sortref))
+ 				continue;
+ 
+ 			if (!equal(opfamilies, cur_ec->ec_opfamilies))
+ 				continue;
+ 
+ 			foreach(lc2, cur_ec->ec_members)
+ 			{
+ 				EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
+ 
+ 				/*
+ 				 * If below an outer join, don't match constants: they're not as
+ 				 * constant as they look.
+ 				 */
+ 				if (cur_ec->ec_below_outer_join &&
+ 					cur_em->em_is_const)
+ 					continue;
+ 
+ 				if (expr_datatype == cur_em->em_datatype &&
+ 					equal(expr, cur_em->em_expr))
+ 					return cur_ec;	/* Match! */
+ 			}
+ 		}
+ 
+ 		/*
+ 		 * No match, so build a new single-member EC
+ 		 */
+ 		newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+ 
+ 		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+ 		root->eq_classes = lappend(root->eq_classes, newec);
+ 		MemoryContextSwitchTo(oldcontext);
+ 
+ 		return newec;
+ 	}
+ 	else
+ 	{
+ 		EquivalenceClass *ec_found;
+ 		bool	found;
+ 		uint32	hashval;
+ 
+ 		/*
+ 		 * Build the new single-member EC to match against in hash_search()
+ 		 */
+ 		newec = build_new_ec(root, expr, expr_datatype, opfamilies, sortref);
+ 
+ 		hashval = bms_hash_value(newec->ec_relids);
+ 
+ 		ec_found = hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_FIND, &found);
+ 
+ 		if (found)
+ 		{
+ 			list_free(newec->ec_opfamilies);
+ 			list_free_deep(newec->ec_members);
+ 			bms_free(newec->ec_relids);
+ 			pfree(newec);
+ 			return ec_found;
+ 		}
+ 
+ 		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+ 
+ 		root->eq_classes = lappend(root->eq_classes, newec);
+ 
+ 		hash_search_with_hash_value(root->eq_classes_hash, newec, hashval, HASH_ENTER, &found);
+ 
+ 		Assert(!found);
+ 
+ 		MemoryContextSwitchTo(oldcontext);
+ 
+ 		return newec;
+ 	}
+ }
+ 
  
  /*
   * generate_base_implied_equalities
diff -dcrpN postgresql.orig/src/backend/optimizer/path/pathkeys.c postgresql.1/src/backend/optimizer/path/pathkeys.c
*** postgresql.orig/src/backend/optimizer/path/pathkeys.c	2010-09-21 13:49:57.000000000 +0200
--- postgresql.1/src/backend/optimizer/path/pathkeys.c	2010-10-28 11:19:35.000000000 +0200
***************
*** 17,22 ****
--- 17,23 ----
   */
  #include "postgres.h"
  
+ #include "access/hash.h"
  #include "access/skey.h"
  #include "catalog/pg_type.h"
  #include "nodes/makefuncs.h"
***************
*** 27,32 ****
--- 28,34 ----
  #include "optimizer/paths.h"
  #include "optimizer/tlist.h"
  #include "parser/parsetree.h"
+ #include "utils/hsearch.h"
  #include "utils/lsyscache.h"
  
  
*************** makePathKey(EquivalenceClass *eclass, Oi
*** 72,77 ****
--- 74,146 ----
  }
  
  /*
+  * pk_hash
+  *		hashtable hash function for PlannerInfo.canon_pathkeys_hash
+  */
+ static uint32
+ pk_hash(const void *key, Size keysize)
+ {
+ 	PathKey	   *pk = (PathKey *) key;
+ 	intptr_t	ptr = (intptr_t) pk->pk_eclass;
+ 
+ 	return DatumGetUInt32(hash_uint32((uint32)ptr));
+ }
+ 
+ /*
+  * pk_match
+  *		hashtable match function for PlannerInfo.canon_pathkeys_hash
+  */
+ static int
+ pk_match(const void *key1, const void *key2, Size keysize)
+ {
+ 	PathKey	   *pk1 = (PathKey *)key1;
+ 	PathKey	   *pk2 = (PathKey *)key2;
+ 
+ 	if (pk1->pk_eclass == pk2->pk_eclass &&
+ 		pk1->pk_opfamily == pk2->pk_opfamily &&
+ 		pk1->pk_strategy == pk2->pk_strategy &&
+ 		pk1->pk_nulls_first == pk2->pk_nulls_first)
+ 		return 0;
+ 	return 1;
+ }
+ 
+ /*
+  * build_canonical_pathkey_hash
+  *
+  * Build PlannerInfo.canon_pathkeys_hash from canon_pathkeys list.
+  */
+ static void
+ build_canonical_pathkey_hash(PlannerInfo *root)
+ {
+ 	MemoryContext	oldcontext;
+ 	HASHCTL		info;
+ 
+ 	ListCell   *lc;
+ 
+ 	info.hash = pk_hash;
+ 	info.match = pk_match;
+ 	info.hcxt = root->planner_cxt;
+ 	info.keysize = sizeof(EquivalenceClass *);
+ 	info.entrysize = sizeof(PathKey);
+ 
+ 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+ 
+ 	root->canon_pathkeys_hash = hash_create("canon_pathkeys", 2048, &info,
+ 							HASH_FUNCTION | HASH_ELEM | HASH_COMPARE | HASH_CONTEXT);
+ 
+ 	foreach(lc, root->canon_pathkeys)
+ 	{
+ 		PathKey *pk = lfirst(lc);
+ 		bool	found;
+ 
+ 		hash_search_with_hash_value(root->canon_pathkeys_hash, pk,
+ 							DatumGetUInt32(hash_any((const unsigned char *) pk, sizeof(PathKey))),
+ 							HASH_ENTER, &found);
+ 		Assert(!found);
+ 	}
+ }
+ 
+ /*
   * make_canonical_pathkey
   *	  Given the parameters for a PathKey, find any pre-existing matching
   *	  pathkey in the query's list of "canonical" pathkeys.  Make a new
*************** make_canonical_pathkey(PlannerInfo *root
*** 85,91 ****
  					   EquivalenceClass *eclass, Oid opfamily,
  					   int strategy, bool nulls_first)
  {
! 	PathKey    *pk;
  	ListCell   *lc;
  	MemoryContext oldcontext;
  
--- 154,160 ----
  					   EquivalenceClass *eclass, Oid opfamily,
  					   int strategy, bool nulls_first)
  {
! 	PathKey    *pk, *pk_found;
  	ListCell   *lc;
  	MemoryContext oldcontext;
  
*************** make_canonical_pathkey(PlannerInfo *root
*** 93,118 ****
  	while (eclass->ec_merged)
  		eclass = eclass->ec_merged;
  
! 	foreach(lc, root->canon_pathkeys)
  	{
! 		pk = (PathKey *) lfirst(lc);
! 		if (eclass == pk->pk_eclass &&
! 			opfamily == pk->pk_opfamily &&
! 			strategy == pk->pk_strategy &&
! 			nulls_first == pk->pk_nulls_first)
! 			return pk;
  	}
  
! 	/*
! 	 * Be sure canonical pathkeys are allocated in the main planning context.
! 	 * Not an issue in normal planning, but it is for GEQO.
! 	 */
! 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
  
! 	pk = makePathKey(eclass, opfamily, strategy, nulls_first);
! 	root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
  
! 	MemoryContextSwitchTo(oldcontext);
  
  	return pk;
  }
--- 162,225 ----
  	while (eclass->ec_merged)
  		eclass = eclass->ec_merged;
  
! 	if (root->canon_pathkeys_hash == NULL &&
! 		list_length(root->canon_pathkeys) > 32)
! 		build_canonical_pathkey_hash(root);
! 
! 	if (root->canon_pathkeys_hash == NULL)
  	{
! 		foreach(lc, root->canon_pathkeys)
! 		{
! 			pk = (PathKey *) lfirst(lc);
! 			if (eclass == pk->pk_eclass &&
! 				opfamily == pk->pk_opfamily &&
! 				strategy == pk->pk_strategy &&
! 				nulls_first == pk->pk_nulls_first)
! 				return pk;
! 		}
! 
! 		/*
! 		 * Be sure canonical pathkeys are allocated in the main planning context.
! 		 * Not an issue in normal planning, but it is for GEQO.
! 		 */
! 		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
! 
! 		pk = makePathKey(eclass, opfamily, strategy, nulls_first);
! 		root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
! 
! 		MemoryContextSwitchTo(oldcontext);
  	}
+ 	else
+ 	{
+ 		uint32	hashval;
+ 		bool	found;
  
! 		/*
! 		 * Be sure canonical pathkeys are allocated in the main planning context.
! 		 * Not an issue in normal planning, but it is for GEQO.
! 		 */
! 		oldcontext = MemoryContextSwitchTo(root->planner_cxt);
  
! 		pk = makePathKey(eclass, opfamily, strategy, nulls_first);
  
! 		hashval = DatumGetInt32(hash_any((const unsigned char *) pk, sizeof(PathKey)));
! 
! 
! 		pk_found = hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_FIND, &found);
! 
! 		if (found)
! 		{
! 			pfree(pk);
! 			pk = pk_found;
! 		}
! 		else
! 		{
! 			root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
! 			hash_search_with_hash_value(root->canon_pathkeys_hash, pk, hashval, HASH_ENTER, &found);
! 		}
! 
! 		MemoryContextSwitchTo(oldcontext);
! 	}
  
  	return pk;
  }
diff -dcrpN postgresql.orig/src/backend/optimizer/plan/planmain.c postgresql.1/src/backend/optimizer/plan/planmain.c
*** postgresql.orig/src/backend/optimizer/plan/planmain.c	2010-10-08 11:04:23.000000000 +0200
--- postgresql.1/src/backend/optimizer/plan/planmain.c	2010-10-28 09:35:37.000000000 +0200
***************
*** 27,32 ****
--- 27,33 ----
  #include "optimizer/placeholder.h"
  #include "optimizer/planmain.h"
  #include "optimizer/tlist.h"
+ #include "utils/hsearch.h"
  #include "utils/selfuncs.h"
  
  
*************** query_planner(PlannerInfo *root, List *t
*** 118,123 ****
--- 119,129 ----
  		 * something like "SELECT 2+2 ORDER BY 1".
  		 */
  		root->canon_pathkeys = NIL;
+ 		if (root->canon_pathkeys_hash)
+ 		{
+ 			hash_destroy(root->canon_pathkeys_hash);
+ 			root->canon_pathkeys_hash = NULL;
+ 		}
  		root->query_pathkeys = canonicalize_pathkeys(root,
  													 root->query_pathkeys);
  		root->group_pathkeys = canonicalize_pathkeys(root,
*************** query_planner(PlannerInfo *root, List *t
*** 146,151 ****
--- 152,162 ----
  	root->join_rel_level = NULL;
  	root->join_cur_level = 0;
  	root->canon_pathkeys = NIL;
+ 	if (root->canon_pathkeys_hash)
+ 	{
+ 		hash_destroy(root->canon_pathkeys_hash);
+ 		root->canon_pathkeys_hash = NULL;
+ 	}
  	root->left_join_clauses = NIL;
  	root->right_join_clauses = NIL;
  	root->full_join_clauses = NIL;
diff -dcrpN postgresql.orig/src/include/nodes/relation.h postgresql.1/src/include/nodes/relation.h
*** postgresql.orig/src/include/nodes/relation.h	2010-10-15 10:31:47.000000000 +0200
--- postgresql.1/src/include/nodes/relation.h	2010-10-28 11:26:02.000000000 +0200
*************** typedef struct PlannerInfo
*** 159,166 ****
--- 159,168 ----
  	List	   *cte_plan_ids;	/* per-CTE-item list of subplan IDs */
  
  	List	   *eq_classes;		/* list of active EquivalenceClasses */
+ 	struct HTAB *eq_classes_hash;	/* optional hashtable for equivalence classes */
  
  	List	   *canon_pathkeys; /* list of "canonical" PathKeys */
+ 	struct HTAB *canon_pathkeys_hash;	/* optional hashtable for "canonical" PathKeys */
  
  	List	   *left_join_clauses;		/* list of RestrictInfos for
  										 * mergejoinable outer join clauses
Binary files postgresql.orig/src/timezone/gmon.out and postgresql.1/src/timezone/gmon.out differ
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to