On Sun, Aug 16, 2009 at 5:31 PM, Robert Haas<robertmh...@gmail.com> wrote:
> It seems that the needed checks are very similar to the ones that we
> already implement when setting restrictinfo->mergeopfamilies.  That is
> filled in by get_mergejoin_opfamilies(), which checks for btree
> opfamilies where the strategy number is BTEqualStrategyNumber.  This
> might cease to be the correct check in the (not-too-distant?) future
> if we end up implementing other kinds of unique indices, but right now
> btrees are all there is.
>
> One possibility would be to have relation_is_distinct_for() call
> get_mergejoin_opfamilies() for each operator; then for each index we
> can check whether the opfamily of the relevant index column is in the
> returned list.  This seems a bit wasteful, though, since I believe
> that relation_is_distinct_for() would be called from joinpath.c, which
> has access to restrictinfo->mergeopfamilies already.
>
> I'm wondering whether it would make more sense to modify the proposed
> API for relation_is_distinct_for() in some way so that we don't lose
> this information.

Here is an attempt at the latter approach.  This doesn't actually
remove the join yet; it just checks whether the join can be removed.
I haven't tested it extensively yet, but am hoping for some feedback
on the basic approach.

...Robert
*** a/src/backend/optimizer/path/costsize.c
--- b/src/backend/optimizer/path/costsize.c
***************
*** 109,114 **** bool		enable_hashagg = true;
--- 109,115 ----
  bool		enable_nestloop = true;
  bool		enable_mergejoin = true;
  bool		enable_hashjoin = true;
+ bool		enable_joinremoval = true;
  
  typedef struct
  {
*** a/src/backend/optimizer/path/joinpath.c
--- b/src/backend/optimizer/path/joinpath.c
***************
*** 16,26 ****
--- 16,39 ----
  
  #include <math.h>
  
+ #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  
  
+ typedef struct path_operator_clause {
+ 	List   *mergeclause_list;
+ 	List   *hashclause_list;
+ 	List   *joinremovalclause_list;
+ 	List   *joinremovalvarattno_list;
+ } path_operator_clause;
+ 
+ static bool join_is_removable(RelOptInfo *joinrel,
+ 				  RelOptInfo *outerrel,
+ 				  RelOptInfo *innerrel,
+ 				  path_operator_clause *poc,
+ 				  JoinType jointype);
  static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 RelOptInfo *outerrel, RelOptInfo *innerrel,
  					 List *restrictlist, List *mergeclause_list,
***************
*** 31,47 **** static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 RelOptInfo *outerrel, RelOptInfo *innerrel,
! 					 List *restrictlist,
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  						 RelOptInfo *outer_rel, JoinType jointype);
! static List *select_mergejoin_clauses(PlannerInfo *root,
! 						 RelOptInfo *joinrel,
! 						 RelOptInfo *outerrel,
! 						 RelOptInfo *innerrel,
! 						 List *restrictlist,
! 						 JoinType jointype);
! 
  
  /*
   * add_paths_to_joinrel
--- 44,61 ----
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 RelOptInfo *outerrel, RelOptInfo *innerrel,
! 					 List *restrictlist, List *hashclauses,
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  						 RelOptInfo *outer_rel, JoinType jointype);
! static bool rel_attrs_needed_above_join(RelOptInfo *rel, RelOptInfo *joinrel);
! static void select_operator_clauses(PlannerInfo *root,
! 						RelOptInfo *joinrel,
! 						RelOptInfo *outerrel,
! 						RelOptInfo *innerrel,
! 						List *restrictlist,
! 						JoinType jointype,
! 						path_operator_clause *poc);
  
  /*
   * add_paths_to_joinrel
***************
*** 75,135 **** add_paths_to_joinrel(PlannerInfo *root,
  					 SpecialJoinInfo *sjinfo,
  					 List *restrictlist)
  {
! 	List	   *mergeclause_list = NIL;
  
  	/*
! 	 * Find potential mergejoin clauses.  We can skip this if we are not
! 	 * interested in doing a mergejoin.  However, mergejoin is currently our
! 	 * only way of implementing full outer joins, so override mergejoin
! 	 * disable if it's a full join.
  	 */
! 	if (enable_mergejoin || jointype == JOIN_FULL)
! 		mergeclause_list = select_mergejoin_clauses(root,
! 													joinrel,
! 													outerrel,
! 													innerrel,
! 													restrictlist,
! 													jointype);
  
  	/*
! 	 * 1. Consider mergejoin paths where both relations must be explicitly
  	 * sorted.
  	 */
  	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, mergeclause_list, jointype, sjinfo);
  
  	/*
! 	 * 2. Consider paths where the outer relation need not be explicitly
  	 * sorted. This includes both nestloops and mergejoins where the outer
  	 * path is already ordered.
  	 */
  	match_unsorted_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, mergeclause_list, jointype, sjinfo);
! 
! #ifdef NOT_USED
! 
! 	/*
! 	 * 3. Consider paths where the inner relation need not be explicitly
! 	 * sorted.	This includes mergejoins only (nestloops were already built in
! 	 * match_unsorted_outer).
! 	 *
! 	 * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
! 	 * significant difference between the inner and outer side of a mergejoin,
! 	 * so match_unsorted_inner creates no paths that aren't equivalent to
! 	 * those made by match_unsorted_outer when add_paths_to_joinrel() is
! 	 * invoked with the two rels given in the other order.
! 	 */
! 	match_unsorted_inner(root, joinrel, outerrel, innerrel,
! 						 restrictlist, mergeclause_list, jointype, sjinfo);
! #endif
  
  	/*
  	 * 4. Consider paths where both outer and inner relations must be hashed
  	 * before being joined.
  	 */
! 	if (enable_hashjoin)
  		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
! 							 restrictlist, jointype, sjinfo);
  }
  
  /*
--- 89,166 ----
  					 SpecialJoinInfo *sjinfo,
  					 List *restrictlist)
  {
! 	path_operator_clause	poc;
! 
! 	/*
! 	 * Use operator classes to find potential mergejoin clauses, hash join
! 	 * clauses, and join removal clauses.
! 	 */
! 	memset(&poc, 0, sizeof(path_operator_clause));
! 	select_operator_clauses(root, joinrel, outerrel, innerrel,
! 							restrictlist, jointype, &poc);
  
  	/*
! 	 * 1. Consider join removal.  This is always the most efficient strategy,
! 	 * so if it works, there's no need to consider anything further.
  	 */
! 	if (poc.joinremovalclause_list != NIL
! 		&& join_is_removable(joinrel, outerrel, innerrel, &poc, jointype))
! 	{
! 		/* XXX add noop path */
! 		elog(DEBUG1, "join is removable");
! 		/* XXX return; */
! 	}
  
  	/*
! 	 * 2. Consider mergejoin paths where both relations must be explicitly
  	 * sorted.
  	 */
  	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, poc.mergeclause_list, jointype, sjinfo);
  
  	/*
! 	 * 3. Consider paths where the outer relation need not be explicitly
  	 * sorted. This includes both nestloops and mergejoins where the outer
  	 * path is already ordered.
  	 */
  	match_unsorted_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, poc.mergeclause_list, jointype, sjinfo);
  
  	/*
  	 * 4. Consider paths where both outer and inner relations must be hashed
  	 * before being joined.
  	 */
! 	if (poc.hashclause_list != NIL)
  		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
! 							 restrictlist, poc.hashclause_list, jointype,
! 							 sjinfo);
! }
! 
! /*
!  * Test whether a join can be implemented by scanning the outer side and
!  * ignoring the inner side altogether.
!  */
! static bool
! join_is_removable(RelOptInfo *joinrel,
! 				  RelOptInfo *outerrel,
! 				  RelOptInfo *innerrel,
! 				  path_operator_clause *poc,
! 				  JoinType jointype)
! {
! 	/* Shouldn't get here unless we have a left join with suitable clauses. */
! 	Assert(jointype == JOIN_LEFT && poc->joinremovalclause_list != NIL);
! 
! 	/* We can't remove the join if it's providing needed attributes. */
! 	if (rel_attrs_needed_above_join(innerrel, joinrel))
! 		return false;
! 
! 	/* Nullable side of the left join must be suitably unique. */
! 	if (!relation_is_distinct_for(innerrel, poc->joinremovalvarattno_list,
! 								  poc->joinremovalclause_list))
! 		return false;
! 
! 	/* OK to remove it. */
! 	return true;
  }
  
  /*
***************
*** 743,748 **** match_unsorted_outer(PlannerInfo *root,
--- 774,780 ----
   * 'innerrel' is the inner join relation
   * 'restrictlist' contains all of the RestrictInfo nodes for restriction
   *		clauses that apply to this join
+  * 'hashclauses' contains all the available hashclauses
   * 'jointype' is the type of join to do
   * 'sjinfo' is extra info about the join for selectivity estimation
   */
***************
*** 752,876 **** hash_inner_and_outer(PlannerInfo *root,
  					 RelOptInfo *outerrel,
  					 RelOptInfo *innerrel,
  					 List *restrictlist,
  					 JoinType jointype,
  					 SpecialJoinInfo *sjinfo)
  {
- 	bool		isouterjoin;
- 	List	   *hashclauses;
- 	ListCell   *l;
- 
  	/*
! 	 * Hashjoin only supports inner, left, semi, and anti joins.
  	 */
! 	switch (jointype)
! 	{
! 		case JOIN_INNER:
! 		case JOIN_SEMI:
! 		case JOIN_UNIQUE_OUTER:
! 		case JOIN_UNIQUE_INNER:
! 			isouterjoin = false;
! 			break;
! 		case JOIN_LEFT:
! 		case JOIN_ANTI:
! 			isouterjoin = true;
! 			break;
! 		default:
! 			return;
! 	}
  
! 	/*
! 	 * We need to build only one hashpath for any given pair of outer and
! 	 * inner relations; all of the hashable clauses will be used as keys.
! 	 *
! 	 * Scan the join's restrictinfo list to find hashjoinable clauses that are
! 	 * usable with this pair of sub-relations.
! 	 */
! 	hashclauses = NIL;
! 	foreach(l, restrictlist)
  	{
! 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
! 
! 		if (!restrictinfo->can_join ||
! 			restrictinfo->hashjoinoperator == InvalidOid)
! 			continue;			/* not hashjoinable */
! 
! 		/*
! 		 * If processing an outer join, only use its own join clauses for
! 		 * hashing.  For inner joins we need not be so picky.
! 		 */
! 		if (isouterjoin && restrictinfo->is_pushed_down)
! 			continue;
! 
! 		/*
! 		 * Check if clause is usable with these input rels.
! 		 */
! 		if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
! 			bms_is_subset(restrictinfo->right_relids, innerrel->relids))
! 		{
! 			/* righthand side is inner */
! 		}
! 		else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
! 				 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
! 		{
! 			/* lefthand side is inner */
! 		}
! 		else
! 			continue;			/* no good for these input relations */
! 
! 		hashclauses = lappend(hashclauses, restrictinfo);
  	}
! 
! 	/* If we found any usable hashclauses, make a path */
! 	if (hashclauses)
  	{
! 		/*
! 		 * We consider both the cheapest-total-cost and cheapest-startup-cost
! 		 * outer paths.  There's no need to consider any but the
! 		 * cheapest-total-cost inner path, however.
! 		 */
! 		Path	   *cheapest_startup_outer = outerrel->cheapest_startup_path;
! 		Path	   *cheapest_total_outer = outerrel->cheapest_total_path;
! 		Path	   *cheapest_total_inner = innerrel->cheapest_total_path;
! 
! 		/* Unique-ify if need be */
! 		if (jointype == JOIN_UNIQUE_OUTER)
! 		{
! 			cheapest_total_outer = (Path *)
! 				create_unique_path(root, outerrel,
! 								   cheapest_total_outer, sjinfo);
! 			Assert(cheapest_total_outer);
! 			cheapest_startup_outer = cheapest_total_outer;
! 			jointype = JOIN_INNER;
! 		}
! 		else if (jointype == JOIN_UNIQUE_INNER)
! 		{
! 			cheapest_total_inner = (Path *)
! 				create_unique_path(root, innerrel,
! 								   cheapest_total_inner, sjinfo);
! 			Assert(cheapest_total_inner);
! 			jointype = JOIN_INNER;
! 		}
  
  		add_path(joinrel, (Path *)
  				 create_hashjoin_path(root,
  									  joinrel,
  									  jointype,
  									  sjinfo,
! 									  cheapest_total_outer,
  									  cheapest_total_inner,
  									  restrictlist,
  									  hashclauses));
- 		if (cheapest_startup_outer != cheapest_total_outer)
- 			add_path(joinrel, (Path *)
- 					 create_hashjoin_path(root,
- 										  joinrel,
- 										  jointype,
- 										  sjinfo,
- 										  cheapest_startup_outer,
- 										  cheapest_total_inner,
- 										  restrictlist,
- 										  hashclauses));
- 	}
  }
  
  /*
--- 784,840 ----
  					 RelOptInfo *outerrel,
  					 RelOptInfo *innerrel,
  					 List *restrictlist,
+ 					 List *hashclauses,
  					 JoinType jointype,
  					 SpecialJoinInfo *sjinfo)
  {
  	/*
! 	 * We consider both the cheapest-total-cost and cheapest-startup-cost
! 	 * outer paths.  There's no need to consider any but the
! 	 * cheapest-total-cost inner path, however.
  	 */
! 	Path	   *cheapest_startup_outer = outerrel->cheapest_startup_path;
! 	Path	   *cheapest_total_outer = outerrel->cheapest_total_path;
! 	Path	   *cheapest_total_inner = innerrel->cheapest_total_path;
  
! 	/* Unique-ify if need be */
! 	if (jointype == JOIN_UNIQUE_OUTER)
  	{
! 		cheapest_total_outer = (Path *)
! 			create_unique_path(root, outerrel,
! 							   cheapest_total_outer, sjinfo);
! 		Assert(cheapest_total_outer);
! 		cheapest_startup_outer = cheapest_total_outer;
! 		jointype = JOIN_INNER;
  	}
! 	else if (jointype == JOIN_UNIQUE_INNER)
  	{
! 		cheapest_total_inner = (Path *)
! 			create_unique_path(root, innerrel,
! 							   cheapest_total_inner, sjinfo);
! 		Assert(cheapest_total_inner);
! 		jointype = JOIN_INNER;
! 	}
  
+ 	add_path(joinrel, (Path *)
+ 			 create_hashjoin_path(root,
+ 								  joinrel,
+ 								  jointype,
+ 								  sjinfo,
+ 								  cheapest_total_outer,
+ 								  cheapest_total_inner,
+ 								  restrictlist,
+ 								  hashclauses));
+ 	if (cheapest_startup_outer != cheapest_total_outer)
  		add_path(joinrel, (Path *)
  				 create_hashjoin_path(root,
  									  joinrel,
  									  jointype,
  									  sjinfo,
! 									  cheapest_startup_outer,
  									  cheapest_total_inner,
  									  restrictlist,
  									  hashclauses));
  }
  
  /*
***************
*** 943,973 **** best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  }
  
  /*
!  * select_mergejoin_clauses
!  *	  Select mergejoin clauses that are usable for a particular join.
!  *	  Returns a list of RestrictInfo nodes for those clauses.
   *
   * We also mark each selected RestrictInfo to show which side is currently
   * being considered as outer.  These are transient markings that are only
   * good for the duration of the current add_paths_to_joinrel() call!
   *
   * We examine each restrictinfo clause known for the join to see
!  * if it is mergejoinable and involves vars from the two sub-relations
!  * currently of interest.
   */
! static List *
! select_mergejoin_clauses(PlannerInfo *root,
! 						 RelOptInfo *joinrel,
! 						 RelOptInfo *outerrel,
! 						 RelOptInfo *innerrel,
! 						 List *restrictlist,
! 						 JoinType jointype)
  {
- 	List	   *result_list = NIL;
  	bool		isouterjoin = IS_OUTER_JOIN(jointype);
  	bool		have_nonmergeable_joinclause = false;
  	ListCell   *l;
  
  	foreach(l, restrictlist)
  	{
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
--- 907,972 ----
  }
  
  /*
!  * rel_attrs_needed_above_join
!  *     Determine whether any attributes from rel are used outside of joinrel.
!  *
!  * As a micro-optimization, it seems better to start with max_attr and count
!  * down rather than starting with min_attr and counting up, on the theory that
!  * the system attributes are somewhat less likely to be what is wanted and
!  * should be tested last.
!  */
! static bool
! rel_attrs_needed_above_join(RelOptInfo *rel, RelOptInfo *joinrel)
! {
! 	int attroff;
! 	for (attroff = rel->max_attr - rel->min_attr; attroff >= 0; --attroff)
! 		if (!bms_is_subset(rel->attr_needed[attroff], joinrel->relids))
! 			return true;
! 	return false;
! }
! 
! /*
!  * select_operator_clauses
!  *	  Select operator clauses that are usable for merging, hashing, or
!  *	  removal of a particular join.  Populates path_operator_clause structure.
   *
   * We also mark each selected RestrictInfo to show which side is currently
   * being considered as outer.  These are transient markings that are only
   * good for the duration of the current add_paths_to_joinrel() call!
   *
   * We examine each restrictinfo clause known for the join to see
!  * if it is mergejoinable and/or hashjoinable and involves vars from the two
!  * sub-relations currently of interest.  We also test for claues that could
!  * possibly be useful for join removal.
!  *
!  * All results are returned through the path_operator_clause structure.
   */
! static void
! select_operator_clauses(PlannerInfo *root,
! 					    RelOptInfo *joinrel,
! 					    RelOptInfo *outerrel,
! 					    RelOptInfo *innerrel,
! 					    List *restrictlist,
! 					    JoinType jointype,
! 						path_operator_clause *poc)
  {
  	bool		isouterjoin = IS_OUTER_JOIN(jointype);
  	bool		have_nonmergeable_joinclause = false;
+ 	bool		joinremovalOK = enable_joinremoval;
  	ListCell   *l;
  
+ 	/*
+ 	 * Currently, we only know how to remove left joins to a baserel with
+ 	 * unique indices.  We can check most of these criteria pretty trivially
+ 	 * to avoid doing useless extra work.  But checking whether any of the
+ 	 * indices are unique would require iterating over the indexlist, so for
+ 	 * now we just make sure there are indices of some sort or other.  If none
+ 	 * of them are unique, join removal will still fail, just slightly later.
+ 	 */
+ 	if (jointype != JOIN_LEFT || innerrel->reloptkind != RELOPT_BASEREL
+ 		|| innerrel->rtekind != RTE_RELATION || innerrel->indexlist == NIL)
+ 		joinremovalOK = false;
+ 
  	foreach(l, restrictlist)
  	{
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
***************
*** 981,988 **** select_mergejoin_clauses(PlannerInfo *root,
  		if (isouterjoin && restrictinfo->is_pushed_down)
  			continue;
  
! 		if (!restrictinfo->can_join ||
! 			restrictinfo->mergeopfamilies == NIL)
  		{
  			have_nonmergeable_joinclause = true;
  			continue;			/* not mergejoinable */
--- 980,991 ----
  		if (isouterjoin && restrictinfo->is_pushed_down)
  			continue;
  
! 		/*
! 		 * If this restrictinfo is not a binary opclause with nonoverlapping
! 		 * sets of relids, it is useless for all purposes we care about here.
! 		 * See comments regarding can_join in include/nodes/relation.h
! 		 */
! 		if (!restrictinfo->can_join)
  		{
  			have_nonmergeable_joinclause = true;
  			continue;			/* not mergejoinable */
***************
*** 1012,1046 **** select_mergejoin_clauses(PlannerInfo *root,
  		}
  
  		/*
! 		 * Insist that each side have a non-redundant eclass.  This
! 		 * restriction is needed because various bits of the planner expect
! 		 * that each clause in a merge be associatable with some pathkey in a
! 		 * canonical pathkey list, but redundant eclasses can't appear in
! 		 * canonical sort orderings.  (XXX it might be worth relaxing this,
! 		 * but not enough time to address it for 8.3.)
! 		 *
! 		 * Note: it would be bad if this condition failed for an otherwise
! 		 * mergejoinable FULL JOIN clause, since that would result in
! 		 * undesirable planner failure.  I believe that is not possible
! 		 * however; a variable involved in a full join could only appear in
! 		 * below_outer_join eclasses, which aren't considered redundant.
  		 *
! 		 * This case *can* happen for left/right join clauses: the outer-side
! 		 * variable could be equated to a constant.  Because we will propagate
! 		 * that constant across the join clause, the loss of ability to do a
! 		 * mergejoin is not really all that big a deal, and so it's not clear
! 		 * that improving this is important.
  		 */
! 		cache_mergeclause_eclasses(root, restrictinfo);
! 
! 		if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
! 			EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
  		{
! 			have_nonmergeable_joinclause = true;
! 			continue;			/* can't handle redundant eclasses */
  		}
  
! 		result_list = lappend(result_list, restrictinfo);
  	}
  
  	/*
--- 1015,1089 ----
  		}
  
  		/*
! 		 * Mergejoin-specific checks.
  		 *
! 		 * At present, mergejoin is our only way of implementing FULL joins,
! 		 * so we ignore the enable_mergejoin flag if this is a FULL join.
  		 */
! 		if (enable_mergejoin || jointype == JOIN_FULL)
  		{
! 			if (restrictinfo->mergeopfamilies == NIL)
! 				have_nonmergeable_joinclause = true;
! 			else
! 			{
! 				/*
! 				 * Insist that each side have a non-redundant eclass.  This
! 				 * restriction is needed because various bits of the planner
! 				 * expect that each clause in a merge be associatable with some
! 				 * pathkey in a canonical pathkey list, but redundant eclasses
! 				 * can't appear in canonical sort orderings.  (XXX it might be
! 				 * worth relaxing this, but not enough time to address it for
! 				 * 8.3.)
! 				 *
! 				 * Note: it would be bad if this condition failed for an
! 				 * otherwise mergejoinable FULL JOIN clause, since that would
! 				 * result in undesirable planner failure.  I believe that is
! 				 * not possible however; a variable involved in a full join
! 				 * could only appear in below_outer_join eclasses, which aren't
! 				 * considered redundant.
! 				 *
! 				 * This case *can* happen for left/right join clauses: the
! 				 * outer-side variable could be equated to a constant.  Because
! 				 * we will propagate that constant across the join clause, the
! 				 * loss of ability to do a mergejoin is not really all that big
! 				 * a deal, and so it's not clear that improving this is
! 				 * important.
! 				 */
! 				cache_mergeclause_eclasses(root, restrictinfo);
! 
! 				if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
! 					EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
! 					have_nonmergeable_joinclause = true;
! 				else
! 					poc->mergeclause_list = lappend(poc->mergeclause_list,
! 													restrictinfo);
! 			}
  		}
  
! 		/*
! 		 * Hashjoin-specific checks.
! 		 */
! 		if (enable_hashjoin && restrictinfo->hashjoinoperator != InvalidOid
! 			&& jointype != JOIN_FULL && jointype != JOIN_RIGHT)
! 			poc->hashclause_list = lappend(poc->hashclause_list, restrictinfo);
! 
! 		/*
! 		 * Joinremoval-specific checks.
! 		 */
! 		if (joinremovalOK && restrictinfo->mergeopfamilies != NIL)
! 		{
! 			Expr *expr = (Expr *) restrictinfo->clause;
! 			Node *outernode = restrictinfo->outer_is_left ? get_leftop(expr)
! 				: get_rightop(expr);
! 			if (IsA(outernode, Var))
! 			{
! 				poc->joinremovalclause_list =
! 					lappend(poc->joinremovalclause_list, restrictinfo);
! 				poc->joinremovalvarattno_list =
! 					lappend_int(poc->joinremovalvarattno_list,
! 								((Var *) outernode)->varattno);
! 			}
! 		}
  	}
  
  	/*
***************
*** 1055,1061 **** select_mergejoin_clauses(PlannerInfo *root,
  		switch (jointype)
  		{
  			case JOIN_RIGHT:
! 				return NIL;		/* not mergejoinable */
  			case JOIN_FULL:
  				ereport(ERROR,
  						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
--- 1098,1105 ----
  		switch (jointype)
  		{
  			case JOIN_RIGHT:
! 				poc->mergeclause_list = NIL;		/* not mergejoinable */
! 				break;
  			case JOIN_FULL:
  				ereport(ERROR,
  						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
***************
*** 1066,1071 **** select_mergejoin_clauses(PlannerInfo *root,
  				break;
  		}
  	}
- 
- 	return result_list;
  }
--- 1110,1113 ----
*** a/src/backend/optimizer/util/pathnode.c
--- b/src/backend/optimizer/util/pathnode.c
***************
*** 1084,1089 **** translate_sub_tlist(List *tlist, int relid)
--- 1084,1091 ----
   * compatibility.  That looks at btree or hash opfamily membership, and so
   * should give trustworthy answers for all operators that we might need
   * to deal with here.)
+  *
+  * See also relation_is_distinct_for().
   */
  static bool
  query_is_distinct_for(Query *query, List *colnos, List *opids)
***************
*** 1214,1219 **** distinct_col_search(int colno, List *colnos, List *opids)
--- 1216,1287 ----
  	return InvalidOid;
  }
  
+ 
+ /*
+  * relation_is_distinct_for - does relation never contain duplicates over the
+  *		specified columns?
+  *
+  * colnos are the column numbers of interest, and restrictlist is a list (of
+  * equal length) of RestrictInfos referencing operator clauses.
+  */
+ bool
+ relation_is_distinct_for(RelOptInfo *rel, List *colnos, List *restrictlist)
+ {
+ 	ListCell *ic;
+ 
+ 	foreach (ic, rel->indexlist)
+ 	{
+ 		IndexOptInfo   *ind = (IndexOptInfo *) lfirst(ic);
+ 		int				c;
+ 
+ 		/*
+ 		 * If the index is not unique or if it's a partial index that doesn't
+ 		 * match the query, it's useless to us.  We are not equipped to make
+ 		 * use of expression indexes, so reject those quickly as well.
+ 		 */
+ 		if (!ind->unique || (ind->indpred != NIL && !ind->predOK)
+ 			|| ind->indexprs != NIL)
+ 			continue;
+ 
+ 		/* Check each column. O(n^2), but we expect the lists to be short. */
+ 		for (c = 0; c < ind->ncolumns; ++c)
+ 		{
+ 			ListCell   *lc1,
+ 					   *lc2;
+ 			RestrictInfo   *rinfo = NULL;
+ 
+ 			/* Find the RestrictInfo for this column. */
+ 			forboth(lc1, colnos, lc2, restrictlist)
+ 			{
+ 				if (ind->indexkeys[c] == lfirst_int(lc1))
+ 				{
+ 					rinfo = lfirst(lc2);
+ 					break;
+ 				}
+ 			}
+ 
+ 			/*
+ 			 * If a RestrictInfo was found, check whether the opfamily of the
+ 			 * index column is one of the opfamilies to which the opclause's
+ 			 * opearator belongs.
+ 			 */
+ 			if (rinfo != NULL
+ 				&& list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c]))
+ 				break;
+ 		}
+ 
+ 		/* Found them all? */
+ 		if (c == ind->ncolumns)
+ 			return true;
+ 	}
+ 
+ 	/*
+ 	 * Some day it would be nice to check for other methods of establishing
+ 	 * distinctness.
+ 	 */
+ 	return false;
+ }
+ 
  /*
   * create_subqueryscan_path
   *	  Creates a path corresponding to a sequential scan of a subquery,
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 663,668 **** static struct config_bool ConfigureNamesBool[] =
--- 663,676 ----
  		true, NULL, NULL
  	},
  	{
+ 		{"enable_joinremoval", PGC_USERSET, QUERY_TUNING_METHOD,
+ 			gettext_noop("Enables the planner's use of join removal."),
+ 			NULL
+ 		},
+ 		&enable_joinremoval,
+ 		true, NULL, NULL
+ 	},
+ 	{
  		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
  			gettext_noop("Enables genetic query optimization."),
  			gettext_noop("This algorithm attempts to do planning without "
*** a/src/include/optimizer/cost.h
--- b/src/include/optimizer/cost.h
***************
*** 59,64 **** extern bool enable_hashagg;
--- 59,65 ----
  extern bool enable_nestloop;
  extern bool enable_mergejoin;
  extern bool enable_hashjoin;
+ extern bool enable_joinremoval;
  extern int	constraint_exclusion;
  
  extern double clamp_row_est(double nrows);
*** a/src/include/optimizer/pathnode.h
--- b/src/include/optimizer/pathnode.h
***************
*** 87,92 **** extern HashPath *create_hashjoin_path(PlannerInfo *root,
--- 87,95 ----
  					 List *restrict_clauses,
  					 List *hashclauses);
  
+ bool relation_is_distinct_for(RelOptInfo *rel, List *colnos,
+ 						 List *restrictlist);
+ 
  /*
   * prototypes for relnode.c
   */
*** a/src/test/regress/expected/rangefuncs.out
--- b/src/test/regress/expected/rangefuncs.out
***************
*** 1,16 ****
  SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
!        name        | setting 
! -------------------+---------
!  enable_bitmapscan | on
!  enable_hashagg    | on
!  enable_hashjoin   | on
!  enable_indexscan  | on
!  enable_mergejoin  | on
!  enable_nestloop   | on
!  enable_seqscan    | on
!  enable_sort       | on
!  enable_tidscan    | on
! (9 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);
--- 1,17 ----
  SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
!         name        | setting 
! --------------------+---------
!  enable_bitmapscan  | on
!  enable_hashagg     | on
!  enable_hashjoin    | on
!  enable_indexscan   | on
!  enable_joinremoval | on
!  enable_mergejoin   | on
!  enable_nestloop    | on
!  enable_seqscan     | on
!  enable_sort        | on
!  enable_tidscan     | on
! (10 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to