[HACKERS] updated join removal patch

Robert Haas Sat, 05 Sep 2009 20:17:34 -0700

Here we go again.  Following Tom's advice to not insert crocks in
add_path(), I've instead introduced a noopjoin path type which ignores
its inner side.  This could possibly be simplified to just a "noop"
path that doesn't even include an outer side, but the way I've done it
here doesn't really cost anything and might allow debug pprints to
print more useful information.  If, in the future, we have some other
use for a noop path, we might want to reconsider, though.


Comments appreciated.  This still needs some doc changes, and maybe
some regression tests, but I am not 100% sure that the basic
implementation will meet with general approval as yet, so don't want
to put the cart before the horse.

Thanks,

...Robert

*** a/src/backend/nodes/outfuncs.c
--- b/src/backend/nodes/outfuncs.c
***************
*** 1453,1458 **** _outHashPath(StringInfo str, HashPath *node)
--- 1453,1466 ----
  }
  
  static void
+ _outNoopJoinPath(StringInfo str, NoopJoinPath *node)
+ {
+ 	WRITE_NODE_TYPE("NOOPJOINPATH");
+ 
+ 	_outJoinPathInfo(str, (JoinPath *) node);
+ }
+ 
+ static void
  _outPlannerGlobal(StringInfo str, PlannerGlobal *node)
  {
  	WRITE_NODE_TYPE("PLANNERGLOBAL");
***************
*** 2643,2648 **** _outNode(StringInfo str, void *obj)
--- 2651,2659 ----
  			case T_HashPath:
  				_outHashPath(str, obj);
  				break;
+ 			case T_NoopJoinPath:
+ 				_outNoopJoinPath(str, obj);
+ 				break;
  			case T_PlannerGlobal:
  				_outPlannerGlobal(str, obj);
  				break;
*** a/src/backend/optimizer/README
--- b/src/backend/optimizer/README
***************
*** 352,357 **** RelOptInfo      - a relation or joined relations
--- 352,358 ----
    MaterialPath  - a Material plan node
    UniquePath    - remove duplicate rows
    NestPath      - nested-loop joins
+   NoopJoinPath  - noop join; scan outer side only
    MergePath     - merge joins
    HashPath      - hash joins
  
***************
*** 377,383 **** cheapest-overall path and doing an explicit sort.
  
  When we are generating paths for a particular RelOptInfo, we discard a path
  if it is more expensive than another known path that has the same or better
! sort order.  We will never discard a path that is the only known way to
  achieve a given sort order (without an explicit sort, that is).  In this
  way, the next level up will have the maximum freedom to build mergejoins
  without sorting, since it can pick from any of the paths retained for its
--- 378,384 ----
  
  When we are generating paths for a particular RelOptInfo, we discard a path
  if it is more expensive than another known path that has the same or better
! sort order.  We never discard a path that is the only known way to
  achieve a given sort order (without an explicit sort, that is).  In this
  way, the next level up will have the maximum freedom to build mergejoins
  without sorting, since it can pick from any of the paths retained for its
*** a/src/backend/optimizer/path/costsize.c
--- b/src/backend/optimizer/path/costsize.c
***************
*** 109,114 **** bool		enable_hashagg = true;
--- 109,115 ----
  bool		enable_nestloop = true;
  bool		enable_mergejoin = true;
  bool		enable_hashjoin = true;
+ bool		enable_joinremoval = true;
  
  typedef struct
  {
***************
*** 2120,2125 **** cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
--- 2121,2141 ----
  	path->jpath.path.total_cost = startup_cost + run_cost;
  }
  
+ /*
+  * cost_noopjoin
+  *	  Determines and returns the cost of joining two relations by just
+  *	  scanning the outer one.
+  *
+  * 'path' is already filled in except for the cost fields
+  */
+ void
+ cost_noopjoin(NoopJoinPath *path)
+ {
+ 	Path	   *outer_path = path->outerjoinpath;
+ 
+ 	path->path.startup_cost = outer_path->startup_cost;
+ 	path->path.total_cost = outer_path->total_cost;
+ }
  
  /*
   * cost_subplan
*** a/src/backend/optimizer/path/joinpath.c
--- b/src/backend/optimizer/path/joinpath.c
***************
*** 16,26 ****
--- 16,39 ----
  
  #include <math.h>
  
+ #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  
  
+ typedef struct path_operator_clause {
+ 	List   *mergeclause_list;
+ 	List   *hashclause_list;
+ 	List   *joinremovalclause_list;
+ 	List   *joinremovalvarattno_list;
+ } path_operator_clause;
+ 
+ static bool join_is_removable(RelOptInfo *joinrel,
+ 				  RelOptInfo *outerrel,
+ 				  RelOptInfo *innerrel,
+ 				  path_operator_clause *poc,
+ 				  JoinType jointype);
  static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 RelOptInfo *outerrel, RelOptInfo *innerrel,
  					 List *restrictlist, List *mergeclause_list,
***************
*** 31,47 **** static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 RelOptInfo *outerrel, RelOptInfo *innerrel,
! 					 List *restrictlist,
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  						 RelOptInfo *outer_rel, JoinType jointype);
! static List *select_mergejoin_clauses(PlannerInfo *root,
! 						 RelOptInfo *joinrel,
! 						 RelOptInfo *outerrel,
! 						 RelOptInfo *innerrel,
! 						 List *restrictlist,
! 						 JoinType jointype);
! 
  
  /*
   * add_paths_to_joinrel
--- 44,61 ----
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
  					 RelOptInfo *outerrel, RelOptInfo *innerrel,
! 					 List *restrictlist, List *hashclauses,
  					 JoinType jointype, SpecialJoinInfo *sjinfo);
  static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  						 RelOptInfo *outer_rel, JoinType jointype);
! static bool rel_attrs_needed_above_join(RelOptInfo *rel, RelOptInfo *joinrel);
! static void select_operator_clauses(PlannerInfo *root,
! 						RelOptInfo *joinrel,
! 						RelOptInfo *outerrel,
! 						RelOptInfo *innerrel,
! 						List *restrictlist,
! 						JoinType jointype,
! 						path_operator_clause *poc);
  
  /*
   * add_paths_to_joinrel
***************
*** 75,135 **** add_paths_to_joinrel(PlannerInfo *root,
  					 SpecialJoinInfo *sjinfo,
  					 List *restrictlist)
  {
! 	List	   *mergeclause_list = NIL;
  
  	/*
! 	 * Find potential mergejoin clauses.  We can skip this if we are not
! 	 * interested in doing a mergejoin.  However, mergejoin is currently our
! 	 * only way of implementing full outer joins, so override mergejoin
! 	 * disable if it's a full join.
  	 */
! 	if (enable_mergejoin || jointype == JOIN_FULL)
! 		mergeclause_list = select_mergejoin_clauses(root,
! 													joinrel,
! 													outerrel,
! 													innerrel,
! 													restrictlist,
! 													jointype);
  
  	/*
! 	 * 1. Consider mergejoin paths where both relations must be explicitly
  	 * sorted.
  	 */
  	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, mergeclause_list, jointype, sjinfo);
  
  	/*
! 	 * 2. Consider paths where the outer relation need not be explicitly
  	 * sorted. This includes both nestloops and mergejoins where the outer
  	 * path is already ordered.
  	 */
  	match_unsorted_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, mergeclause_list, jointype, sjinfo);
! 
! #ifdef NOT_USED
! 
! 	/*
! 	 * 3. Consider paths where the inner relation need not be explicitly
! 	 * sorted.	This includes mergejoins only (nestloops were already built in
! 	 * match_unsorted_outer).
! 	 *
! 	 * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
! 	 * significant difference between the inner and outer side of a mergejoin,
! 	 * so match_unsorted_inner creates no paths that aren't equivalent to
! 	 * those made by match_unsorted_outer when add_paths_to_joinrel() is
! 	 * invoked with the two rels given in the other order.
! 	 */
! 	match_unsorted_inner(root, joinrel, outerrel, innerrel,
! 						 restrictlist, mergeclause_list, jointype, sjinfo);
! #endif
  
  	/*
  	 * 4. Consider paths where both outer and inner relations must be hashed
  	 * before being joined.
  	 */
! 	if (enable_hashjoin)
  		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
! 							 restrictlist, jointype, sjinfo);
  }
  
  /*
--- 89,182 ----
  					 SpecialJoinInfo *sjinfo,
  					 List *restrictlist)
  {
! 	path_operator_clause	poc;
! 
! 	/*
! 	 * Use operator classes to find potential mergejoin clauses, hash join
! 	 * clauses, and join removal clauses.
! 	 */
! 	memset(&poc, 0, sizeof(path_operator_clause));
! 	select_operator_clauses(root, joinrel, outerrel, innerrel,
! 							restrictlist, jointype, &poc);
  
  	/*
! 	 * 1. Consider join removal.  This is always the most efficient strategy,
! 	 * so if it works, there's no need to consider anything further.
  	 */
! 	if (poc.joinremovalclause_list != NIL
! 		&& join_is_removable(joinrel, outerrel, innerrel, &poc, jointype))
! 	{
! 		/*
! 		 * Need to preserve both the cheapest total path and the cheapeset
! 		 * startup path for the outer rel.  We're not actually going to scan
! 		 * the inner rel, so it doesn't matter which path we pick for that
! 		 * one.
! 		 */
! 		add_path(joinrel, (Path *)
! 				 create_noopjoin_path(root,
! 									  joinrel,
! 									  jointype,
! 									  outerrel->cheapest_total_path,
! 									  innerrel->cheapest_total_path));
! 		add_path(joinrel, (Path *)
! 				 create_noopjoin_path(root,
! 									  joinrel,
! 									  jointype,
! 									  outerrel->cheapest_startup_path,
! 									  innerrel->cheapest_total_path));
! 		return;
! 	}
  
  	/*
! 	 * 2. Consider mergejoin paths where both relations must be explicitly
  	 * sorted.
  	 */
  	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, poc.mergeclause_list, jointype, sjinfo);
  
  	/*
! 	 * 3. Consider paths where the outer relation need not be explicitly
  	 * sorted. This includes both nestloops and mergejoins where the outer
  	 * path is already ordered.
  	 */
  	match_unsorted_outer(root, joinrel, outerrel, innerrel,
! 						 restrictlist, poc.mergeclause_list, jointype, sjinfo);
  
  	/*
  	 * 4. Consider paths where both outer and inner relations must be hashed
  	 * before being joined.
  	 */
! 	if (poc.hashclause_list != NIL)
  		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
! 							 restrictlist, poc.hashclause_list, jointype,
! 							 sjinfo);
! }
! 
! /*
!  * Test whether a join can be implemented by scanning the outer side and
!  * ignoring the inner side altogether.
!  */
! static bool
! join_is_removable(RelOptInfo *joinrel,
! 				  RelOptInfo *outerrel,
! 				  RelOptInfo *innerrel,
! 				  path_operator_clause *poc,
! 				  JoinType jointype)
! {
! 	/* Shouldn't get here unless we have a left join with suitable clauses. */
! 	Assert(jointype == JOIN_LEFT && poc->joinremovalclause_list != NIL);
! 
! 	/* We can't remove the join if it's providing needed attributes. */
! 	if (rel_attrs_needed_above_join(innerrel, joinrel))
! 		return false;
! 
! 	/* Nullable side of the left join must be suitably unique. */
! 	if (!relation_is_distinct_for(innerrel, poc->joinremovalvarattno_list,
! 								  poc->joinremovalclause_list))
! 		return false;
! 
! 	/* OK to remove it. */
! 	return true;
  }
  
  /*
***************
*** 743,748 **** match_unsorted_outer(PlannerInfo *root,
--- 790,796 ----
   * 'innerrel' is the inner join relation
   * 'restrictlist' contains all of the RestrictInfo nodes for restriction
   *		clauses that apply to this join
+  * 'hashclauses' contains all the available hashclauses
   * 'jointype' is the type of join to do
   * 'sjinfo' is extra info about the join for selectivity estimation
   */
***************
*** 752,876 **** hash_inner_and_outer(PlannerInfo *root,
  					 RelOptInfo *outerrel,
  					 RelOptInfo *innerrel,
  					 List *restrictlist,
  					 JoinType jointype,
  					 SpecialJoinInfo *sjinfo)
  {
- 	bool		isouterjoin;
- 	List	   *hashclauses;
- 	ListCell   *l;
- 
  	/*
! 	 * Hashjoin only supports inner, left, semi, and anti joins.
  	 */
! 	switch (jointype)
! 	{
! 		case JOIN_INNER:
! 		case JOIN_SEMI:
! 		case JOIN_UNIQUE_OUTER:
! 		case JOIN_UNIQUE_INNER:
! 			isouterjoin = false;
! 			break;
! 		case JOIN_LEFT:
! 		case JOIN_ANTI:
! 			isouterjoin = true;
! 			break;
! 		default:
! 			return;
! 	}
  
! 	/*
! 	 * We need to build only one hashpath for any given pair of outer and
! 	 * inner relations; all of the hashable clauses will be used as keys.
! 	 *
! 	 * Scan the join's restrictinfo list to find hashjoinable clauses that are
! 	 * usable with this pair of sub-relations.
! 	 */
! 	hashclauses = NIL;
! 	foreach(l, restrictlist)
  	{
! 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
! 
! 		if (!restrictinfo->can_join ||
! 			restrictinfo->hashjoinoperator == InvalidOid)
! 			continue;			/* not hashjoinable */
! 
! 		/*
! 		 * If processing an outer join, only use its own join clauses for
! 		 * hashing.  For inner joins we need not be so picky.
! 		 */
! 		if (isouterjoin && restrictinfo->is_pushed_down)
! 			continue;
! 
! 		/*
! 		 * Check if clause is usable with these input rels.
! 		 */
! 		if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
! 			bms_is_subset(restrictinfo->right_relids, innerrel->relids))
! 		{
! 			/* righthand side is inner */
! 		}
! 		else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
! 				 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
! 		{
! 			/* lefthand side is inner */
! 		}
! 		else
! 			continue;			/* no good for these input relations */
! 
! 		hashclauses = lappend(hashclauses, restrictinfo);
  	}
! 
! 	/* If we found any usable hashclauses, make a path */
! 	if (hashclauses)
  	{
! 		/*
! 		 * We consider both the cheapest-total-cost and cheapest-startup-cost
! 		 * outer paths.  There's no need to consider any but the
! 		 * cheapest-total-cost inner path, however.
! 		 */
! 		Path	   *cheapest_startup_outer = outerrel->cheapest_startup_path;
! 		Path	   *cheapest_total_outer = outerrel->cheapest_total_path;
! 		Path	   *cheapest_total_inner = innerrel->cheapest_total_path;
! 
! 		/* Unique-ify if need be */
! 		if (jointype == JOIN_UNIQUE_OUTER)
! 		{
! 			cheapest_total_outer = (Path *)
! 				create_unique_path(root, outerrel,
! 								   cheapest_total_outer, sjinfo);
! 			Assert(cheapest_total_outer);
! 			cheapest_startup_outer = cheapest_total_outer;
! 			jointype = JOIN_INNER;
! 		}
! 		else if (jointype == JOIN_UNIQUE_INNER)
! 		{
! 			cheapest_total_inner = (Path *)
! 				create_unique_path(root, innerrel,
! 								   cheapest_total_inner, sjinfo);
! 			Assert(cheapest_total_inner);
! 			jointype = JOIN_INNER;
! 		}
  
  		add_path(joinrel, (Path *)
  				 create_hashjoin_path(root,
  									  joinrel,
  									  jointype,
  									  sjinfo,
! 									  cheapest_total_outer,
  									  cheapest_total_inner,
  									  restrictlist,
  									  hashclauses));
- 		if (cheapest_startup_outer != cheapest_total_outer)
- 			add_path(joinrel, (Path *)
- 					 create_hashjoin_path(root,
- 										  joinrel,
- 										  jointype,
- 										  sjinfo,
- 										  cheapest_startup_outer,
- 										  cheapest_total_inner,
- 										  restrictlist,
- 										  hashclauses));
- 	}
  }
  
  /*
--- 800,856 ----
  					 RelOptInfo *outerrel,
  					 RelOptInfo *innerrel,
  					 List *restrictlist,
+ 					 List *hashclauses,
  					 JoinType jointype,
  					 SpecialJoinInfo *sjinfo)
  {
  	/*
! 	 * We consider both the cheapest-total-cost and cheapest-startup-cost
! 	 * outer paths.  There's no need to consider any but the
! 	 * cheapest-total-cost inner path, however.
  	 */
! 	Path	   *cheapest_startup_outer = outerrel->cheapest_startup_path;
! 	Path	   *cheapest_total_outer = outerrel->cheapest_total_path;
! 	Path	   *cheapest_total_inner = innerrel->cheapest_total_path;
  
! 	/* Unique-ify if need be */
! 	if (jointype == JOIN_UNIQUE_OUTER)
  	{
! 		cheapest_total_outer = (Path *)
! 			create_unique_path(root, outerrel,
! 							   cheapest_total_outer, sjinfo);
! 		Assert(cheapest_total_outer);
! 		cheapest_startup_outer = cheapest_total_outer;
! 		jointype = JOIN_INNER;
  	}
! 	else if (jointype == JOIN_UNIQUE_INNER)
  	{
! 		cheapest_total_inner = (Path *)
! 			create_unique_path(root, innerrel,
! 							   cheapest_total_inner, sjinfo);
! 		Assert(cheapest_total_inner);
! 		jointype = JOIN_INNER;
! 	}
  
+ 	add_path(joinrel, (Path *)
+ 			 create_hashjoin_path(root,
+ 								  joinrel,
+ 								  jointype,
+ 								  sjinfo,
+ 								  cheapest_total_outer,
+ 								  cheapest_total_inner,
+ 								  restrictlist,
+ 								  hashclauses));
+ 	if (cheapest_startup_outer != cheapest_total_outer)
  		add_path(joinrel, (Path *)
  				 create_hashjoin_path(root,
  									  joinrel,
  									  jointype,
  									  sjinfo,
! 									  cheapest_startup_outer,
  									  cheapest_total_inner,
  									  restrictlist,
  									  hashclauses));
  }
  
  /*
***************
*** 943,973 **** best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
  }
  
  /*
!  * select_mergejoin_clauses
!  *	  Select mergejoin clauses that are usable for a particular join.
!  *	  Returns a list of RestrictInfo nodes for those clauses.
   *
   * We also mark each selected RestrictInfo to show which side is currently
   * being considered as outer.  These are transient markings that are only
   * good for the duration of the current add_paths_to_joinrel() call!
   *
   * We examine each restrictinfo clause known for the join to see
!  * if it is mergejoinable and involves vars from the two sub-relations
!  * currently of interest.
   */
! static List *
! select_mergejoin_clauses(PlannerInfo *root,
! 						 RelOptInfo *joinrel,
! 						 RelOptInfo *outerrel,
! 						 RelOptInfo *innerrel,
! 						 List *restrictlist,
! 						 JoinType jointype)
  {
- 	List	   *result_list = NIL;
  	bool		isouterjoin = IS_OUTER_JOIN(jointype);
  	bool		have_nonmergeable_joinclause = false;
  	ListCell   *l;
  
  	foreach(l, restrictlist)
  	{
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
--- 923,988 ----
  }
  
  /*
!  * rel_attrs_needed_above_join
!  *     Determine whether any attributes from rel are used outside of joinrel.
!  *
!  * As a micro-optimization, it seems better to start with max_attr and count
!  * down rather than starting with min_attr and counting up, on the theory that
!  * the system attributes are somewhat less likely to be what is wanted and
!  * should be tested last.
!  */
! static bool
! rel_attrs_needed_above_join(RelOptInfo *rel, RelOptInfo *joinrel)
! {
! 	int attroff;
! 	for (attroff = rel->max_attr - rel->min_attr; attroff >= 0; --attroff)
! 		if (!bms_is_subset(rel->attr_needed[attroff], joinrel->relids))
! 			return true;
! 	return false;
! }
! 
! /*
!  * select_operator_clauses
!  *	  Select operator clauses that are usable for merging, hashing, or
!  *	  removal of a particular join.  Populates path_operator_clause structure.
   *
   * We also mark each selected RestrictInfo to show which side is currently
   * being considered as outer.  These are transient markings that are only
   * good for the duration of the current add_paths_to_joinrel() call!
   *
   * We examine each restrictinfo clause known for the join to see
!  * if it is mergejoinable and/or hashjoinable and involves vars from the two
!  * sub-relations currently of interest.  We also test for claues that could
!  * possibly be useful for join removal.
!  *
!  * All results are returned through the path_operator_clause structure.
   */
! static void
! select_operator_clauses(PlannerInfo *root,
! 					    RelOptInfo *joinrel,
! 					    RelOptInfo *outerrel,
! 					    RelOptInfo *innerrel,
! 					    List *restrictlist,
! 					    JoinType jointype,
! 						path_operator_clause *poc)
  {
  	bool		isouterjoin = IS_OUTER_JOIN(jointype);
  	bool		have_nonmergeable_joinclause = false;
+ 	bool		joinremovalOK = enable_joinremoval;
  	ListCell   *l;
  
+ 	/*
+ 	 * Currently, we only know how to remove left joins to a baserel with
+ 	 * unique indices.  We can check most of these criteria pretty trivially
+ 	 * to avoid doing useless extra work.  But checking whether any of the
+ 	 * indices are unique would require iterating over the indexlist, so for
+ 	 * now we just make sure there are indices of some sort or other.  If none
+ 	 * of them are unique, join removal will still fail, just slightly later.
+ 	 */
+ 	if (jointype != JOIN_LEFT || innerrel->reloptkind != RELOPT_BASEREL
+ 		|| innerrel->rtekind != RTE_RELATION || innerrel->indexlist == NIL)
+ 		joinremovalOK = false;
+ 
  	foreach(l, restrictlist)
  	{
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
***************
*** 981,988 **** select_mergejoin_clauses(PlannerInfo *root,
  		if (isouterjoin && restrictinfo->is_pushed_down)
  			continue;
  
! 		if (!restrictinfo->can_join ||
! 			restrictinfo->mergeopfamilies == NIL)
  		{
  			have_nonmergeable_joinclause = true;
  			continue;			/* not mergejoinable */
--- 996,1007 ----
  		if (isouterjoin && restrictinfo->is_pushed_down)
  			continue;
  
! 		/*
! 		 * If this restrictinfo is not a binary opclause with nonoverlapping
! 		 * sets of relids, it is useless for all purposes we care about here.
! 		 * See comments regarding can_join in include/nodes/relation.h
! 		 */
! 		if (!restrictinfo->can_join)
  		{
  			have_nonmergeable_joinclause = true;
  			continue;			/* not mergejoinable */
***************
*** 1012,1046 **** select_mergejoin_clauses(PlannerInfo *root,
  		}
  
  		/*
! 		 * Insist that each side have a non-redundant eclass.  This
! 		 * restriction is needed because various bits of the planner expect
! 		 * that each clause in a merge be associatable with some pathkey in a
! 		 * canonical pathkey list, but redundant eclasses can't appear in
! 		 * canonical sort orderings.  (XXX it might be worth relaxing this,
! 		 * but not enough time to address it for 8.3.)
  		 *
! 		 * Note: it would be bad if this condition failed for an otherwise
! 		 * mergejoinable FULL JOIN clause, since that would result in
! 		 * undesirable planner failure.  I believe that is not possible
! 		 * however; a variable involved in a full join could only appear in
! 		 * below_outer_join eclasses, which aren't considered redundant.
! 		 *
! 		 * This case *can* happen for left/right join clauses: the outer-side
! 		 * variable could be equated to a constant.  Because we will propagate
! 		 * that constant across the join clause, the loss of ability to do a
! 		 * mergejoin is not really all that big a deal, and so it's not clear
! 		 * that improving this is important.
  		 */
! 		cache_mergeclause_eclasses(root, restrictinfo);
! 
! 		if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
! 			EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
  		{
! 			have_nonmergeable_joinclause = true;
! 			continue;			/* can't handle redundant eclasses */
  		}
  
! 		result_list = lappend(result_list, restrictinfo);
  	}
  
  	/*
--- 1031,1105 ----
  		}
  
  		/*
! 		 * Mergejoin-specific checks.
  		 *
! 		 * At present, mergejoin is our only way of implementing FULL joins,
! 		 * so we ignore the enable_mergejoin flag if this is a FULL join.
  		 */
! 		if (enable_mergejoin || jointype == JOIN_FULL)
  		{
! 			if (restrictinfo->mergeopfamilies == NIL)
! 				have_nonmergeable_joinclause = true;
! 			else
! 			{
! 				/*
! 				 * Insist that each side have a non-redundant eclass.  This
! 				 * restriction is needed because various bits of the planner
! 				 * expect that each clause in a merge be associatable with some
! 				 * pathkey in a canonical pathkey list, but redundant eclasses
! 				 * can't appear in canonical sort orderings.  (XXX it might be
! 				 * worth relaxing this, but not enough time to address it for
! 				 * 8.3.)
! 				 *
! 				 * Note: it would be bad if this condition failed for an
! 				 * otherwise mergejoinable FULL JOIN clause, since that would
! 				 * result in undesirable planner failure.  I believe that is
! 				 * not possible however; a variable involved in a full join
! 				 * could only appear in below_outer_join eclasses, which aren't
! 				 * considered redundant.
! 				 *
! 				 * This case *can* happen for left/right join clauses: the
! 				 * outer-side variable could be equated to a constant.  Because
! 				 * we will propagate that constant across the join clause, the
! 				 * loss of ability to do a mergejoin is not really all that big
! 				 * a deal, and so it's not clear that improving this is
! 				 * important.
! 				 */
! 				cache_mergeclause_eclasses(root, restrictinfo);
! 
! 				if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
! 					EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
! 					have_nonmergeable_joinclause = true;
! 				else
! 					poc->mergeclause_list = lappend(poc->mergeclause_list,
! 													restrictinfo);
! 			}
  		}
  
! 		/*
! 		 * Hashjoin-specific checks.
! 		 */
! 		if (enable_hashjoin && restrictinfo->hashjoinoperator != InvalidOid
! 			&& jointype != JOIN_FULL && jointype != JOIN_RIGHT)
! 			poc->hashclause_list = lappend(poc->hashclause_list, restrictinfo);
! 
! 		/*
! 		 * Joinremoval-specific checks.
! 		 */
! 		if (joinremovalOK && restrictinfo->mergeopfamilies != NIL)
! 		{
! 			Expr *expr = (Expr *) restrictinfo->clause;
! 			Node *outernode = restrictinfo->outer_is_left ? get_leftop(expr)
! 				: get_rightop(expr);
! 			if (IsA(outernode, Var))
! 			{
! 				poc->joinremovalclause_list =
! 					lappend(poc->joinremovalclause_list, restrictinfo);
! 				poc->joinremovalvarattno_list =
! 					lappend_int(poc->joinremovalvarattno_list,
! 								((Var *) outernode)->varattno);
! 			}
! 		}
  	}
  
  	/*
***************
*** 1055,1061 **** select_mergejoin_clauses(PlannerInfo *root,
  		switch (jointype)
  		{
  			case JOIN_RIGHT:
! 				return NIL;		/* not mergejoinable */
  			case JOIN_FULL:
  				ereport(ERROR,
  						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
--- 1114,1121 ----
  		switch (jointype)
  		{
  			case JOIN_RIGHT:
! 				poc->mergeclause_list = NIL;		/* not mergejoinable */
! 				break;
  			case JOIN_FULL:
  				ereport(ERROR,
  						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
***************
*** 1066,1071 **** select_mergejoin_clauses(PlannerInfo *root,
  				break;
  		}
  	}
- 
- 	return result_list;
  }
--- 1126,1129 ----
*** a/src/backend/optimizer/plan/createplan.c
--- b/src/backend/optimizer/plan/createplan.c
***************
*** 170,175 **** create_plan(PlannerInfo *root, Path *best_path)
--- 170,178 ----
  			plan = create_join_plan(root,
  									(JoinPath *) best_path);
  			break;
+ 		case T_NoopJoinPath:
+ 			plan = create_plan(root, ((JoinPath *) best_path)->outerjoinpath);
+ 			break;
  		case T_Append:
  			plan = create_append_plan(root,
  									  (AppendPath *) best_path);
*** a/src/backend/optimizer/util/pathnode.c
--- b/src/backend/optimizer/util/pathnode.c
***************
*** 1084,1089 **** translate_sub_tlist(List *tlist, int relid)
--- 1084,1091 ----
   * compatibility.  That looks at btree or hash opfamily membership, and so
   * should give trustworthy answers for all operators that we might need
   * to deal with here.)
+  *
+  * See also relation_is_distinct_for().
   */
  static bool
  query_is_distinct_for(Query *query, List *colnos, List *opids)
***************
*** 1214,1219 **** distinct_col_search(int colno, List *colnos, List *opids)
--- 1216,1287 ----
  	return InvalidOid;
  }
  
+ 
+ /*
+  * relation_is_distinct_for - does relation never contain duplicates over the
+  *		specified columns?
+  *
+  * colnos are the column numbers of interest, and restrictlist is a list (of
+  * equal length) of RestrictInfos referencing operator clauses.
+  */
+ bool
+ relation_is_distinct_for(RelOptInfo *rel, List *colnos, List *restrictlist)
+ {
+ 	ListCell *ic;
+ 
+ 	foreach (ic, rel->indexlist)
+ 	{
+ 		IndexOptInfo   *ind = (IndexOptInfo *) lfirst(ic);
+ 		int				c;
+ 
+ 		/*
+ 		 * If the index is not unique or if it's a partial index that doesn't
+ 		 * match the query, it's useless to us.  We are not equipped to make
+ 		 * use of expression indexes, so reject those quickly as well.
+ 		 */
+ 		if (!ind->unique || (ind->indpred != NIL && !ind->predOK)
+ 			|| ind->indexprs != NIL)
+ 			continue;
+ 
+ 		/* Check each column. O(n^2), but we expect the lists to be short. */
+ 		for (c = 0; c < ind->ncolumns; ++c)
+ 		{
+ 			ListCell   *lc1,
+ 					   *lc2;
+ 			RestrictInfo   *rinfo = NULL;
+ 
+ 			/* Find the RestrictInfo for this column. */
+ 			forboth(lc1, colnos, lc2, restrictlist)
+ 			{
+ 				if (ind->indexkeys[c] == lfirst_int(lc1))
+ 				{
+ 					rinfo = lfirst(lc2);
+ 					break;
+ 				}
+ 			}
+ 
+ 			/*
+ 			 * If a RestrictInfo was found, check whether the opfamily of the
+ 			 * index column is one of the opfamilies to which the opclause's
+ 			 * opearator belongs.
+ 			 */
+ 			if (rinfo != NULL
+ 				&& list_member_oid(rinfo->mergeopfamilies, ind->opfamily[c]))
+ 				break;
+ 		}
+ 
+ 		/* Found them all? */
+ 		if (c == ind->ncolumns)
+ 			return true;
+ 	}
+ 
+ 	/*
+ 	 * Some day it would be nice to check for other methods of establishing
+ 	 * distinctness.
+ 	 */
+ 	return false;
+ }
+ 
  /*
   * create_subqueryscan_path
   *	  Creates a path corresponding to a sequential scan of a subquery,
***************
*** 1500,1502 **** create_hashjoin_path(PlannerInfo *root,
--- 1568,1608 ----
  
  	return pathnode;
  }
+ 
+ /*
+  * create_noopjoin_path
+  *	  Creates a pathnode corresponding to a noop join between two
+  *	  relations.  A noop join implements the join by scanning the outer
+  *	  relation and ignoring the inner relation.  This is appropriate when
+  *	  no outputs from the inner relation are needed to produce the final
+  *	  resultset, and the join won't add or remove any rows from the resultset.
+  *
+  * 'joinrel' is the join relation.
+  * 'jointype' is the type of join required
+  * 'outer_path' is the outer path
+  * 'inner_path' is the inner path
+  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
+  *
+  * Returns the resulting path node.
+  */
+ NoopJoinPath *
+ create_noopjoin_path(PlannerInfo *root,
+ 					 RelOptInfo *joinrel,
+ 					 JoinType jointype,
+ 					 Path *outer_path,
+ 					 Path *inner_path)
+ {
+ 	NoopJoinPath   *pathnode = makeNode(NoopJoinPath);
+ 
+ 	pathnode->path.pathtype = T_NoopJoinPath;
+ 	pathnode->path.parent = joinrel;
+ 	pathnode->jointype = jointype;
+ 	pathnode->outerjoinpath = outer_path;
+ 	pathnode->innerjoinpath = inner_path;
+ 	pathnode->joinrestrictinfo = NIL;
+ 	pathnode->path.pathkeys = outer_path->pathkeys;
+ 
+ 	cost_noopjoin(pathnode);
+ 
+ 	return pathnode;
+ }
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 663,668 **** static struct config_bool ConfigureNamesBool[] =
--- 663,676 ----
  		true, NULL, NULL
  	},
  	{
+ 		{"enable_joinremoval", PGC_USERSET, QUERY_TUNING_METHOD,
+ 			gettext_noop("Enables the planner's use of join removal."),
+ 			NULL
+ 		},
+ 		&enable_joinremoval,
+ 		true, NULL, NULL
+ 	},
+ 	{
  		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
  			gettext_noop("Enables genetic query optimization."),
  			gettext_noop("This algorithm attempts to do planning without "
*** a/src/include/nodes/nodes.h
--- b/src/include/nodes/nodes.h
***************
*** 206,211 **** typedef enum NodeTag
--- 206,212 ----
  	T_NestPath,
  	T_MergePath,
  	T_HashPath,
+ 	T_NoopJoinPath,
  	T_TidPath,
  	T_AppendPath,
  	T_ResultPath,
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
***************
*** 852,857 **** typedef struct HashPath
--- 852,864 ----
  } HashPath;
  
  /*
+  * A noop-join path needs no special fields.  Noop joins are inserted by the
+  * join removal logic, and stripped out when the path is converted to a plan.
+  */
+ 
+ typedef JoinPath NoopJoinPath;
+ 
+ /*
   * Restriction clause info.
   *
   * We create one of these for each AND sub-clause of a restriction condition
*** a/src/include/optimizer/cost.h
--- b/src/include/optimizer/cost.h
***************
*** 59,64 **** extern bool enable_hashagg;
--- 59,65 ----
  extern bool enable_nestloop;
  extern bool enable_mergejoin;
  extern bool enable_hashjoin;
+ extern bool enable_joinremoval;
  extern int	constraint_exclusion;
  
  extern double clamp_row_est(double nrows);
***************
*** 106,111 **** extern void cost_mergejoin(MergePath *path, PlannerInfo *root,
--- 107,113 ----
  			   SpecialJoinInfo *sjinfo);
  extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
  			  SpecialJoinInfo *sjinfo);
+ extern void cost_noopjoin(NestPath *path);
  extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
  extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
  extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
*** a/src/include/optimizer/pathnode.h
--- b/src/include/optimizer/pathnode.h
***************
*** 87,92 **** extern HashPath *create_hashjoin_path(PlannerInfo *root,
--- 87,101 ----
  					 List *restrict_clauses,
  					 List *hashclauses);
  
+ extern NoopJoinPath *create_noopjoin_path(PlannerInfo *root,
+ 					 RelOptInfo *joinrel,
+ 					 JoinType jointype,
+ 					 Path *outer_path,
+ 					 Path *inner_path);
+ 
+ bool relation_is_distinct_for(RelOptInfo *rel, List *colnos,
+ 						 List *restrictlist);
+ 
  /*
   * prototypes for relnode.c
   */
*** a/src/test/regress/expected/rangefuncs.out
--- b/src/test/regress/expected/rangefuncs.out
***************
*** 1,16 ****
  SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
!        name        | setting 
! -------------------+---------
!  enable_bitmapscan | on
!  enable_hashagg    | on
!  enable_hashjoin   | on
!  enable_indexscan  | on
!  enable_mergejoin  | on
!  enable_nestloop   | on
!  enable_seqscan    | on
!  enable_sort       | on
!  enable_tidscan    | on
! (9 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);
--- 1,17 ----
  SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
!         name        | setting 
! --------------------+---------
!  enable_bitmapscan  | on
!  enable_hashagg     | on
!  enable_hashjoin    | on
!  enable_indexscan   | on
!  enable_joinremoval | on
!  enable_mergejoin   | on
!  enable_nestloop    | on
!  enable_seqscan     | on
!  enable_sort        | on
!  enable_tidscan     | on
! (10 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] updated join removal patch

Reply via email to