Re: Removing unneeded self joins

Alexander Kuzmenkov Fri, 27 Jul 2018 05:27:02 -0700

Here is a current version of the patch, still rather experimental. Sincethe previous version, I fixed some bugs and added the possibility toremove a relation even when it is mentioned in target lists. I have torewrite all references to the removed relation in targetlists and theequivalence classes, so that they point to the remaining relation. Ichange RestrictInfos in place, and update attr_needed and reltarget ofthe remaining relation. I also update equivalence members, and deleteequivalence classes that become single-member.

I'm posting it a single file now, because all meaningful changes are inanalyzejoins.c anyway.


--
Alexander Kuzmenkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

>From 2f2be5d5efa0fcef38a0d6dd1225c73a377f88b8 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <a.kuzmen...@postgrespro.ru>
Date: Wed, 13 Jun 2018 20:56:03 +0300
Subject: [PATCH] Remove unique self joins.

Remove inner joins of a relation to itself on all columns of some unique
index. Such a join can be replaced with a scan that uses the combined
restrictions from both sides. We can build the required proofs of
uniqueness using the existing innerrel_is_unique machinery with slight
modifications.
---
 src/backend/nodes/equalfuncs.c            |   7 +-
 src/backend/optimizer/path/indxpath.c     |  23 +-
 src/backend/optimizer/path/joinpath.c     |   6 +-
 src/backend/optimizer/plan/analyzejoins.c | 819 +++++++++++++++++++++++++++---
 src/backend/optimizer/plan/planmain.c     |   7 +-
 src/backend/optimizer/util/pathnode.c     |   3 +-
 src/backend/optimizer/util/relnode.c      |  26 +-
 src/include/nodes/relation.h              |   6 +-
 src/include/optimizer/pathnode.h          |   5 +
 src/include/optimizer/paths.h             |  14 +-
 src/include/optimizer/planmain.h          |  10 +-
 src/test/regress/expected/join.out        |  84 ++-
 src/test/regress/sql/join.sql             |  34 ++
 13 files changed, 937 insertions(+), 107 deletions(-)

diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 6a971d0..fd02dc8 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -166,9 +166,10 @@ _equalVar(const Var *a, const Var *b)
 	COMPARE_SCALAR_FIELD(vartypmod);
 	COMPARE_SCALAR_FIELD(varcollid);
 	COMPARE_SCALAR_FIELD(varlevelsup);
-	COMPARE_SCALAR_FIELD(varnoold);
-	COMPARE_SCALAR_FIELD(varoattno);
-	COMPARE_LOCATION_FIELD(location);
+	/*
+	 * Parse location and old varno/varattno may differ even when
+	 * the variables are logically the same.
+	 */
 
 	return true;
 }
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index f295558..4daf3bf 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -2964,7 +2964,8 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel,
  * relation_has_unique_index_for
  *	  Determine whether the relation provably has at most one row satisfying
  *	  a set of equality conditions, because the conditions constrain all
- *	  columns of some unique index.
+ *	  columns of some unique index. If index_info is not null, it is set to
+ *	  point to a new UniqueIndexInfo containing the index and conditions.
  *
  * The conditions can be represented in either or both of two ways:
  * 1. A list of RestrictInfo nodes, where the caller has already determined
@@ -2985,7 +2986,8 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel,
 bool
 relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 							  List *restrictlist,
-							  List *exprlist, List *oprlist)
+							  List *exprlist, List *oprlist,
+							  UniqueIndexInfo **index_info)
 {
 	ListCell   *ic;
 
@@ -3041,6 +3043,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 	{
 		IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
 		int			c;
+		List *matched_restrictlist = NIL;
 
 		/*
 		 * If the index is not unique, or not immediately enforced, or if it's
@@ -3089,6 +3092,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 				if (match_index_to_operand(rexpr, c, ind))
 				{
 					matched = true; /* column is unique */
+					matched_restrictlist = lappend(matched_restrictlist, rinfo);
 					break;
 				}
 			}
@@ -3131,7 +3135,22 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 
 		/* Matched all columns of this index? */
 		if (c == ind->ncolumns)
+		{
+			if (index_info != NULL)
+			{
+				/* This may be called in GEQO memory context. */
+				MemoryContext oldContext = MemoryContextSwitchTo(root->planner_cxt);
+				*index_info = palloc(sizeof(UniqueIndexInfo));
+				(*index_info)->index = ind;
+				(*index_info)->clauses = list_copy(matched_restrictlist);
+				MemoryContextSwitchTo(oldContext);
+			}
+			if (matched_restrictlist)
+				pfree(matched_restrictlist);
 			return true;
+		}
+		if (matched_restrictlist)
+			pfree(matched_restrictlist);
 	}
 
 	return false;
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 642f951..2edcf22 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -176,7 +176,8 @@ add_paths_to_joinrel(PlannerInfo *root,
 													innerrel,
 													JOIN_INNER,
 													restrictlist,
-													false);
+													false,
+													NULL /*index_info*/);
 			break;
 		default:
 			extra.inner_unique = innerrel_is_unique(root,
@@ -185,7 +186,8 @@ add_paths_to_joinrel(PlannerInfo *root,
 													innerrel,
 													jointype,
 													restrictlist,
-													false);
+													false,
+													NULL /*index_info*/);
 			break;
 	}
 
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 0e73f9c..db12502 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,12 +22,15 @@
  */
 #include "postgres.h"
 
+#include "catalog/pg_class.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "optimizer/predtest.h"
+#include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "utils/lsyscache.h"
@@ -39,14 +42,15 @@ static void remove_rel_from_query(PlannerInfo *root, int relid,
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
 static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
-					List *clause_list);
+					List *clause_list, UniqueIndexInfo **info);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
 static bool is_innerrel_unique_for(PlannerInfo *root,
 					   Relids joinrelids,
 					   Relids outerrelids,
 					   RelOptInfo *innerrel,
 					   JoinType jointype,
-					   List *restrictlist);
+					   List *restrictlist,
+					   UniqueIndexInfo **info);
 
 
 /*
@@ -58,7 +62,7 @@ static bool is_innerrel_unique_for(PlannerInfo *root,
  * data structures that have to be updated are accessible via "root".
  */
 List *
-remove_useless_joins(PlannerInfo *root, List *joinlist)
+remove_useless_left_joins(PlannerInfo *root, List *joinlist)
 {
 	ListCell   *lc;
 
@@ -162,7 +166,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	int			innerrelid;
 	RelOptInfo *innerrel;
 	Relids		joinrelids;
-	List	   *clause_list = NIL;
 	ListCell   *l;
 	int			attroff;
 
@@ -238,67 +241,24 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	}
 
 	/*
-	 * Search for mergejoinable clauses that constrain the inner rel against
-	 * either the outer rel or a pseudoconstant.  If an operator is
-	 * mergejoinable then it behaves like equality for some btree opclass, so
-	 * it's what we want.  The mergejoinability test also eliminates clauses
-	 * containing volatile functions, which we couldn't depend on.
+	 * Check for pushed-down clauses referencing the inner rel. If there is
+	 * such a clause then join removal has to be disallowed.  We have to
+	 * check this despite the previous attr_needed checks because of the
+	 * possibility of pushed-down clauses referencing the rel.
 	 */
 	foreach(l, innerrel->joininfo)
 	{
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
-
-		/*
-		 * If it's not a join clause for this outer join, we can't use it.
-		 * Note that if the clause is pushed-down, then it is logically from
-		 * above the outer join, even if it references no other rels (it might
-		 * be from WHERE, for example).
-		 */
-		if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
-		{
-			/*
-			 * If such a clause actually references the inner rel then join
-			 * removal has to be disallowed.  We have to check this despite
-			 * the previous attr_needed checks because of the possibility of
-			 * pushed-down clauses referencing the rel.
-			 */
-			if (bms_is_member(innerrelid, restrictinfo->clause_relids))
+		if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)
+			&& bms_is_member(innerrel->relid, restrictinfo->clause_relids))
 				return false;
-			continue;			/* else, ignore; not useful here */
-		}
-
-		/* Ignore if it's not a mergejoinable clause */
-		if (!restrictinfo->can_join ||
-			restrictinfo->mergeopfamilies == NIL)
-			continue;			/* not mergejoinable */
-
-		/*
-		 * Check if clause has the form "outer op inner" or "inner op outer",
-		 * and if so mark which side is inner.
-		 */
-		if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
-									 innerrel->relids))
-			continue;			/* no good for these input relations */
-
-		/* OK, add to list */
-		clause_list = lappend(clause_list, restrictinfo);
 	}
 
-	/*
-	 * Now that we have the relevant equality join clauses, try to prove the
-	 * innerrel distinct.
-	 */
-	if (rel_is_distinct_for(root, innerrel, clause_list))
-		return true;
-
-	/*
-	 * Some day it would be nice to check for other methods of establishing
-	 * distinctness.
-	 */
-	return false;
+	return is_innerrel_unique_for(root, joinrelids, sjinfo->min_lefthand,
+								  innerrel, sjinfo->jointype, innerrel->joininfo,
+								  NULL /*unique_index*/);
 }
 
-
 /*
  * Remove the target relid from the planner's data structures, having
  * determined that there is no need to include it in the query.
@@ -568,7 +528,7 @@ reduce_unique_semijoins(PlannerInfo *root)
 		/* Test whether the innerrel is unique for those clauses. */
 		if (!innerrel_is_unique(root,
 								joinrelids, sjinfo->min_lefthand, innerrel,
-								JOIN_SEMI, restrictlist, true))
+								JOIN_SEMI, restrictlist, true, NULL /*index_info*/))
 			continue;
 
 		/* OK, remove the SpecialJoinInfo from the list. */
@@ -643,9 +603,13 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
  * Note that the passed-in clause_list may be destructively modified!  This
  * is OK for current uses, because the clause_list is built by the caller for
  * the sole purpose of passing to this function.
+ *
+ * If unique_index is not null, it is set to point to the index that guarantees
+ * uniqueness for a base relation.
  */
 static bool
-rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
+					UniqueIndexInfo **index_info)
 {
 	/*
 	 * We could skip a couple of tests here if we assume all callers checked
@@ -661,8 +625,8 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
 		 * relation_has_unique_index_for automatically adds any usable
 		 * restriction clauses for the rel, so we needn't do that here.
 		 */
-		if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
-			return true;
+		return relation_has_unique_index_for(root, rel, clause_list, NIL, NIL,
+											 index_info);
 	}
 	else if (rel->rtekind == RTE_SUBQUERY)
 	{
@@ -966,6 +930,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
  * heuristic about whether to cache negative answers; it should be "true"
  * if making an inquiry that is not part of the normal bottom-up join search
  * sequence.
+ *
+ * If index_info_out is not null, it is set to point to a new UniqueIndexInfo
+ * allocated in root memory context, that describes the index that guarantees
+ * uniqueness.
  */
 bool
 innerrel_is_unique(PlannerInfo *root,
@@ -974,12 +942,23 @@ innerrel_is_unique(PlannerInfo *root,
 				   RelOptInfo *innerrel,
 				   JoinType jointype,
 				   List *restrictlist,
-				   bool force_cache)
+				   bool force_cache,
+				   UniqueIndexInfo **index_info_out)
 {
 	MemoryContext old_context;
 	ListCell   *lc;
+	UniqueIndexInfo *index_info;
+
+	if (index_info_out)
+		*index_info_out = NULL;
 
-	/* Certainly can't prove uniqueness when there are no joinclauses */
+	/*
+	 * It is possible to prove uniqueness even in the absence of joinclauses,
+	 * just from baserestrictinfos alone. However, in these cases the inner
+	 * relation returns one row at most, so join removal won't give much
+	 * benefit. It seems better to save some planning time by ignoring these
+	 * cases.
+	 */
 	if (restrictlist == NIL)
 		return false;
 
@@ -999,10 +978,14 @@ innerrel_is_unique(PlannerInfo *root,
 	 */
 	foreach(lc, innerrel->unique_for_rels)
 	{
-		Relids		unique_for_rels = (Relids) lfirst(lc);
+		Relids		unique_for_rels = (Relids) linitial(lfirst(lc));
 
 		if (bms_is_subset(unique_for_rels, outerrelids))
+		{
+			if (index_info_out)
+				*index_info_out = lsecond(lfirst(lc));
 			return true;		/* Success! */
+		}
 	}
 
 	/*
@@ -1019,7 +1002,7 @@ innerrel_is_unique(PlannerInfo *root,
 
 	/* No cached information, so try to make the proof. */
 	if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
-							   jointype, restrictlist))
+							   jointype, restrictlist, &index_info))
 	{
 		/*
 		 * Cache the positive result for future probes, being sure to keep it
@@ -1033,9 +1016,12 @@ innerrel_is_unique(PlannerInfo *root,
 		 */
 		old_context = MemoryContextSwitchTo(root->planner_cxt);
 		innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
-											bms_copy(outerrelids));
+							list_make2(bms_copy(outerrelids), index_info));
 		MemoryContextSwitchTo(old_context);
 
+		if (index_info_out)
+			*index_info_out = index_info;
+
 		return true;			/* Success! */
 	}
 	else
@@ -1081,7 +1067,8 @@ is_innerrel_unique_for(PlannerInfo *root,
 					   Relids outerrelids,
 					   RelOptInfo *innerrel,
 					   JoinType jointype,
-					   List *restrictlist)
+					   List *restrictlist,
+					   UniqueIndexInfo **index_info)
 {
 	List	   *clause_list = NIL;
 	ListCell   *lc;
@@ -1123,5 +1110,703 @@ is_innerrel_unique_for(PlannerInfo *root,
 	}
 
 	/* Let rel_is_distinct_for() do the hard work */
-	return rel_is_distinct_for(root, innerrel, clause_list);
+	return rel_is_distinct_for(root, innerrel, clause_list, index_info);
+}
+
+typedef struct
+{
+	Index oldRelid;
+	Index newRelid;
+	bool found;
+} ChangeVarnoContext;
+
+static bool
+change_varno_walker(Node *node, ChangeVarnoContext *context)
+{
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Var) && ((Var *) node)->varno == context->oldRelid)
+	{
+		((Var *) node)->varno = context->newRelid;
+		context->found = true;
+		return false;
+	}
+
+	return expression_tree_walker(node, change_varno_walker, context);
+}
+
+/*
+ * For all Vars in the expression that have varno = oldRelid, set
+ * varno = newRelid.
+ * Return whether any changes were made.
+ */
+static bool
+change_varno(Expr *expr, Index oldRelid, Index newRelid)
+{
+	ChangeVarnoContext context;
+	context.oldRelid = oldRelid;
+	context.newRelid = newRelid;
+	context.found = false;
+	change_varno_walker((Node *) expr, &context);
+	return context.found;
+}
+
+/*
+ * Substitute newId for oldId in relids.
+ */
+static void
+change_relid(Relids *relids, Index oldId, Index newId)
+{
+	if (bms_is_member(oldId, *relids))
+		*relids = bms_add_member(bms_del_member(*relids, oldId), newId);
+}
+
+/*
+ * Move EC members from the removed relation to the remaining one,
+ * removing duplicates.
+ */
+static void
+move_ec_members(EquivalenceClass *ec, Index toRemove, Index toKeep)
+{
+	ListCell *prev = NULL;
+	ListCell *cell = NULL;
+	ListCell *next = list_head(ec->ec_members);
+
+	while (next)
+	{
+		EquivalenceMember *em;
+
+		prev = cell;
+		cell = next;
+		next = lnext(next);
+
+		em = lfirst(cell);
+		if (change_varno(em->em_expr, toRemove, toKeep))
+		{
+			/*
+			 * If we transferred the equivalence member to another relation,
+			 * check that is not the same as the existing member, and if it
+			 * is, delete it.
+			 */
+			ListCell *lc;
+			foreach (lc, ec->ec_members)
+			{
+				if (lc == cell)
+					continue;
+
+				if (equal(((EquivalenceMember *) lfirst(lc))->em_expr,
+						  em->em_expr))
+				{
+					ec->ec_members = list_delete_cell(ec->ec_members, cell, prev);
+					cell = prev;
+					break;
+				}
+			}
+
+			if (lc == NULL)
+			{
+				/*
+				 * We get to keep this EquivalenceMember. Correct its relids.
+				 * nullable_relids should be empty, because self join removal
+				 * only works for inner joins.
+				 */
+				Assert(em->em_nullable_relids == NULL);
+				change_relid(&em->em_relids, toRemove, toKeep);
+			}
+		}
+	}
+}
+
+/*
+ * Remove EC sources referencing given relation.
+ */
+static void
+filter_ec_sources(List **sources, Index relToRemove)
+{
+	ListCell *prev = NULL;
+	ListCell *cell = NULL;
+	ListCell *next = list_head(*sources);
+
+	while (next)
+	{
+		RestrictInfo *rinfo;
+
+		prev = cell;
+		cell = next;
+		next = lnext(next);
+
+		rinfo = castNode(RestrictInfo, lfirst(cell));
+
+		if (bms_is_member(relToRemove, rinfo->required_relids))
+		{
+			*sources = list_delete_cell(*sources, cell, prev);
+			cell = prev;
+		}
+	}
+}
+
+/*
+ * Remove a relation after we have proven that it participates only in an
+ * unneeded unique self join.
+ *
+ * The joinclauses list is destructively changed.
+ */
+static void
+remove_self_join_rel(PlannerInfo *root, Relids joinrelids, List *joinclauses,
+					 RelOptInfo *toKeep, RelOptInfo *toRemove)
+{
+	ListCell *prev, *cell, *next;
+	List *toAppend;
+	int i;
+
+	/*
+	 * Transfer join and restriction clauses from the removed relation to the
+	 * remaining one. We change the Vars of the clause to point to the remaining
+	 * relation instead of the removed one. The clauses that require a subset of
+	 * joinrelids become restriction clauses of the remaining relation, and
+	 * others remain join clauses. We append them to baserestrictinfo and
+	 * joininfo respectively, trying not to introduce duplicates.
+	 *
+	 * We also have to process the 'joinclauses' list here, because it contains
+	 * EC-derived join clauses which must become filter clauses. It is not enough
+	 * to just correct the ECs, because the EC-derived restrictions are generated
+	 * before join removal (see generate_base_implied_equalities).
+	 */
+	toAppend = list_concat(joinclauses, toRemove->baserestrictinfo);
+	toAppend = list_concat(toAppend, toRemove->joininfo);
+
+	foreach(cell, toAppend)
+	{
+		RestrictInfo *rinfo = lfirst_node(RestrictInfo, cell);
+		bool is_join_clause = !bms_is_subset(rinfo->required_relids, joinrelids);
+		List **target = is_join_clause ? &toKeep->joininfo : &toKeep->baserestrictinfo;
+
+		/* We can't have an EC-derived clause that joins to some third relation */
+		Assert( !(is_join_clause && rinfo->parent_ec != NULL) );
+
+		/*
+		 * Do not add multiple clauses derived from the same equivalence class.
+		 */
+		if (is_redundant_derived_clause(rinfo, *target))
+			continue;
+
+		/*
+		 * Replace the references to the removed relation with references to
+		 * the remaining one.
+		 */
+		change_varno(rinfo->clause, toRemove->relid, toKeep->relid);
+
+		/*
+		 * After we have replaced the Vars, check that the resulting clause is
+		 * not implied by the existing ones.
+		 */
+		if (!contain_mutable_functions((Node *) rinfo->clause)
+			&& predicate_implied_by(list_make1(rinfo->clause),
+									*target, false /*weak*/ ))
+			continue;			/* provably implied by r1 */
+
+		/*
+		 * If the clause has the form of "X=X", replace it with null test.
+		 */
+		if (rinfo->mergeopfamilies)
+		{
+			Assert(is_opclause(rinfo->clause));
+			Expr *leftOp = (Expr *) get_leftop(rinfo->clause);
+			Expr *rightOp = (Expr *) get_rightop(rinfo->clause);
+			if (leftOp != NULL && equal(leftOp, rightOp))
+			{
+				NullTest *test = makeNode(NullTest);
+				test->arg = leftOp;
+				test->nulltesttype = IS_NOT_NULL;
+				test->argisrow = false;
+				test->location = -1;
+				rinfo->clause = (Expr *) test;
+			}
+		}
+
+		/*
+		 * Finally, correct the relids of the rinfo, replace the clause with
+		 * the one we just constructed, and append it to the remaining relation.
+		 */
+		change_relid(&rinfo->required_relids, toRemove->relid, toKeep->relid);
+		change_relid(&rinfo->left_relids, toRemove->relid, toKeep->relid);
+		change_relid(&rinfo->right_relids, toRemove->relid, toKeep->relid);
+		change_relid(&rinfo->clause_relids, toRemove->relid, toKeep->relid);
+
+		*target = lappend(*target, rinfo);
+	}
+
+	/*
+	 * Transfer the targetlist.
+	 */
+	Assert(toRemove->reltarget->sortgrouprefs == 0);
+	foreach (cell, toRemove->reltarget->exprs)
+	{
+		Expr *node = lfirst(cell);
+		change_varno(node, toRemove->relid, toKeep->relid);
+		if (!list_member(toKeep->reltarget->exprs, node))
+			toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs,
+											   node);
+	}
+	for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
+	{
+		int attno = i - toKeep->min_attr;
+		toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
+													 toRemove->attr_needed[attno]);
+	}
+
+	/*
+	 * Likewise remove references from SpecialJoinInfo data structures.
+	 *
+	 * This is relevant in case the outer join we're deleting is nested inside
+	 * other outer joins: the upper joins' relid sets have to be adjusted. The
+	 * RHS of the target outer join will be made empty here, but that's OK
+	 * since caller will delete that SpecialJoinInfo entirely.
+	 */
+	foreach(cell, root->join_info_list)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(cell);
+
+		sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, toRemove->relid);
+		sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, toRemove->relid);
+		sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, toRemove->relid);
+		sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, toRemove->relid);
+	}
+
+
+	// !!!FIXME what about placeholders and upper-level tlists (e.g. for grouping)?
+	// The placeholders apparently work somehow due to the fact that they reference
+	// the same Var objects that we modify to point to the other relation.
+
+	/*
+	 * We must move the equivalence members that reference the removed relation
+	 * to the remaining one, being careful not to introduce duplicate members.
+	 * If a EC contains one member and is not used for sorting, it can be removed
+	 * altogether.
+	 */
+	prev = NULL;
+	cell = NULL;
+	next = list_head(root->eq_classes);
+	while (next)
+	{
+		EquivalenceClass *ec;
+
+		prev = cell;
+		cell = next;
+		next = lnext(next);
+
+		ec = lfirst(cell);
+
+		if (!bms_is_member(toRemove->relid, ec->ec_relids))
+		{
+			/*
+			 * This EC doesn't reference the removed relation,
+			 * nothing to be done for it.
+			 */
+			continue;
+		}
+
+		/*
+		 * Update the EC to reference the remaining relation instead
+		 * of the removed one.
+		 */
+		move_ec_members(ec, toRemove->relid, toKeep->relid);
+		change_relid(&ec->ec_relids, toRemove->relid, toKeep->relid);
+
+		if (ec->ec_sortref == 0 && list_length(ec->ec_members) <= 1)
+		{
+			/*
+			 * This EC is not used for sorting and contains one member.
+			 * It won't generate any join clauses and can be removed.
+			 */
+			root->eq_classes = list_delete_cell(root->eq_classes, cell, prev);
+			cell = prev;
+		}
+		else
+		{
+			/*
+			 * The updated EC should be kept.
+			 *
+			 * Some of its source and derived RestrictInfos point to the removed
+			 * relation. There is no straightforward way to determine whether
+			 * such a RestrictInfo should be removed or switched to the
+			 * remaining relation. We remove all of them, and will generate
+			 * the correct ones from equivalence members on demand.
+			 *
+			 * This is not important for the ECs that are only used for sorting,
+			 * but we process them too for the sake of consistency.
+			 */
+			filter_ec_sources(&ec->ec_sources, toRemove->relid);
+			filter_ec_sources(&ec->ec_derives, toRemove->relid);
+			ec->ec_relids = bms_del_member(ec->ec_relids, toRemove->relid);
+		}
+	}
+
+	/*
+	 * Mark the rel as "dead" to show it is no longer part of the join tree.
+	 * (Removing it from the baserel array altogether seems too risky.)
+	 */
+	toRemove->reloptkind = RELOPT_DEADREL;
+
+	/*
+	 * Remove references to the rel from other baserels' attr_needed arrays.
+	 */
+	for (i = 1; i < root->simple_rel_array_size; i++)
+	{
+		RelOptInfo *otherrel = root->simple_rel_array[i];
+		int			attroff;
+
+		/* no point in processing target rel itself */
+		if (i == toRemove->relid)
+			continue;
+
+		/* there may be empty slots corresponding to non-baserel RTEs */
+		if (otherrel == NULL)
+			continue;
+
+		Assert(otherrel->relid == i); /* sanity check on array */
+
+		for (attroff = otherrel->max_attr - otherrel->min_attr;
+			 attroff >= 0;
+			 attroff--)
+		{
+			otherrel->attr_needed[attroff] =
+				bms_del_member(otherrel->attr_needed[attroff], toRemove->relid);
+		}
+	}
+}
+
+/*
+ * Test whether the relations are joined on the same unique column.
+ */
+static bool
+is_unique_self_join(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer,
+					RelOptInfo *inner, List *restrictlist)
+{
+	UniqueIndexInfo *outeridx = NULL;
+	UniqueIndexInfo *inneridx = NULL;
+	ListCell *outerCell, *innerCell;
+
+	innerrel_is_unique(root, joinrelids, inner->relids,
+						   outer, JOIN_INNER, restrictlist, true, &outeridx);
+	if (!outeridx)
+		return false;
+
+	innerrel_is_unique(root, joinrelids, outer->relids,
+						   inner, JOIN_INNER, restrictlist, true, &inneridx);
+	if (!inneridx)
+		return false;
+
+	/* We must have the same unique index for both relations. */
+	if (outeridx->index->indexoid != inneridx->index->indexoid)
+		return false;
+
+	/*
+	 * The index clauses must also be the same. The varnos are different, so
+	 * make a copy and replace all varnos of one relation with another, so
+	 * that we can compare them with equal().
+	 */
+	forboth(innerCell, inneridx->clauses, outerCell, outeridx->clauses)
+	{
+		Expr *innerExpr = copyObject(castNode(RestrictInfo, lfirst(innerCell))->clause);
+		Expr *outerExpr = copyObject(castNode(RestrictInfo, lfirst(outerCell))->clause);
+		change_varno(outerExpr, outer->relid, inner->relid);
+		change_varno(innerExpr, outer->relid, inner->relid);
+		if (!equal(outerExpr, innerExpr))
+		{
+			pfree(outerExpr);
+			pfree(innerExpr);
+			return false;
+		}
+		pfree(outerExpr);
+		pfree(innerExpr);
+	}
+
+	return true;
+}
+
+/*
+ * Scratch space for the unique self join removal code.
+ */
+typedef struct
+{
+	/* Temporary array for relation ids. */
+	Index *relids;
+
+	/*
+	 * Array of Relids, one for each relation, indexed by relation id.
+	 * Each element is a set of relation ids with which this relation
+	 * has a special join.
+	 */
+	Relids *special_join_rels;
+
+	/* Bitmapset for join relids that is used to avoid reallocation. */
+	Relids joinrelids;
+
+	/*
+	 * Top-level targetlist of the query. We have to update any references
+	 * it has to the relations we remove.
+	 */
+	 List *targetlist;
+} UsjScratch;
+
+/*
+ * Find and remove unique self joins in a group of base relations that have
+ * the same Oid.
+ *
+ * Returns IntList of the relids that were removed.
+ */
+static List *
+remove_self_joins_one_group(PlannerInfo *root, Index *relids, int n, UsjScratch *scratch)
+{
+	Relids joinrelids = scratch->joinrelids;
+	List *result = NIL;
+	int i, o;
+	ListCell *lc;
+
+	if (n < 2)
+		return NIL;
+
+	for (o = 0; o < n; o++)
+	{
+		RelOptInfo *outer = root->simple_rel_array[relids[o]];
+
+		for (i = o + 1; i < n; i++)
+		{
+			RelOptInfo *inner = root->simple_rel_array[relids[i]];
+			List *restrictlist;
+
+			/* A sanity check: the relations have the same Oid. */
+			Assert(root->simple_rte_array[relids[i]]->relid
+					== root->simple_rte_array[relids[o]]->relid);
+
+			/*
+			 * This optimization applies to inner joins only, so skip any relations
+			 * that form a special join.
+			 */
+			if (bms_is_member(relids[i], scratch->special_join_rels[relids[o]]))
+				continue;
+
+			/* Reuse joinrelids bitset to avoid reallocation. */
+			joinrelids = bms_del_members(joinrelids, joinrelids);
+
+			/*
+			 * We only deal with base rels here, so their relids bitset
+			 * contains only one member -- their relid.
+			 */
+			joinrelids = bms_add_member(joinrelids, relids[o]);
+			joinrelids = bms_add_member(joinrelids, relids[i]);
+
+			/* Is it a unique self join? */
+			restrictlist = build_joinrel_restrictlist(root, joinrelids, outer,
+													  inner);
+			if (!is_unique_self_join(root, joinrelids, outer, inner,
+										   restrictlist))
+				continue;
+
+			/*
+			 * We can remove either relation, so remove the outer one,
+			 * to simplify this loop.
+			 */
+			remove_self_join_rel(root, joinrelids, restrictlist, inner, outer);
+			result = lappend_int(result, relids[o]);
+
+			/*
+			 * Replace varno in root targetlist.
+			 */
+			foreach(lc, scratch->targetlist)
+				change_varno(lfirst(lc), relids[o], relids[i]);
+
+			/* We removed the outer relation, try the next one. */
+			break;
+		}
+	}
+
+	scratch->joinrelids = joinrelids;
+	return result;
+}
+
+/*
+ * A qsort comparator to sort the relids by the relation Oid.
+ */
+static int
+compare_rte(const Index *left, const Index *right, PlannerInfo *root)
+{
+	return root->simple_rte_array[*left]->relid
+		< root->simple_rte_array[*right]->relid;
+}
+
+/*
+ * Find and remove unique self joins on a particular level of the join tree.
+ *
+ * We sort the relations by Oid and then examine each group with the same Oid.
+ * If we removed any relation, remove it from joinlist as well.
+ */
+static void
+remove_self_joins_one_level(PlannerInfo *root, List **joinlist, UsjScratch *scratch)
+{
+	ListCell *lc;
+	List *relidsToRemove = NIL;
+	Oid groupOid;
+	int groupStart;
+	int i;
+	int n = 0;
+	Index *relid_ascending = scratch->relids;
+
+	/*
+	 * Collect the ids of base relations at this level of the join tree.
+	 */
+	foreach (lc, *joinlist)
+	{
+		RangeTblRef *ref = (RangeTblRef *) lfirst(lc);
+		if (!IsA(ref, RangeTblRef))
+			continue;
+
+		/*
+		 * We only care about base relations from which we select something.
+		 */
+		if (root->simple_rte_array[ref->rtindex]->rtekind == RTE_RELATION
+			&& root->simple_rte_array[ref->rtindex]->relkind == RELKIND_RELATION
+			&& root->simple_rel_array[ref->rtindex] != NULL)
+		{
+			relid_ascending[n++] = ref->rtindex;
+		}
+
+		/*
+		 * Limit the number of joins we process to control the quadratic behavior.
+		 */
+		if (n > join_collapse_limit)
+			break;
+	}
+
+	if (n < 2)
+		return;
+
+	/*
+	 * Find and process the groups of relations that have same Oid.
+	 */
+	qsort_arg(relid_ascending, n, sizeof(*relid_ascending),
+			  (qsort_arg_comparator) compare_rte, root);
+	groupOid = root->simple_rte_array[relid_ascending[0]]->relid;
+	groupStart = 0;
+	for (i = 1; i < n; i++)
+	{
+		RangeTblEntry *rte = root->simple_rte_array[relid_ascending[i]];
+		Assert(rte->relid != InvalidOid);
+		if (rte->relid != groupOid)
+		{
+			relidsToRemove = list_concat(relidsToRemove,
+				remove_self_joins_one_group(root, &relid_ascending[groupStart],
+					i - groupStart, scratch));
+			groupOid = rte->relid;
+			groupStart = i;
+		}
+	}
+	Assert(groupOid != InvalidOid);
+	Assert(groupStart < n);
+	relidsToRemove = list_concat(relidsToRemove,
+		remove_self_joins_one_group(root, &relid_ascending[groupStart],
+			n - groupStart, scratch));
+
+	/*
+	 * Delete the removed relations from joinlist.
+	 */
+	foreach(lc, relidsToRemove)
+	{
+		Index indexToRemove = lfirst_int(lc);
+		ListCell *prev = NULL, *next = NULL;
+		ListCell *lc2 = list_head(*joinlist);
+		while (lc2)
+		{
+			next = lnext(lc2);
+			if (castNode(RangeTblRef, lfirst(lc2))->rtindex == indexToRemove)
+				*joinlist = list_delete_cell(*joinlist, lc2, prev);
+			else
+				prev = lc2;
+			lc2 = next;
+		}
+	}
+
+	return;
+}
+
+/*
+ * Find and remove unique self joins on a single level of a join tree, and
+ * recurse to handle deeper levels.
+ */
+static void
+remove_self_joins_recurse(PlannerInfo *root, List **joinlist, UsjScratch *scratch)
+{
+	ListCell *lc;
+	foreach (lc, *joinlist)
+	{
+		switch (((Node*) lfirst(lc))->type)
+		{
+			case T_List:
+				remove_self_joins_recurse(root, (List **) &lfirst(lc), scratch);
+				break;
+			case T_RangeTblRef:
+				break;
+			default:
+				Assert(false);
+		}
+	}
+	remove_self_joins_one_level(root, joinlist, scratch);
+}
+
+/*
+ * Find out which relations have special joins to which.
+ */
+static void
+find_special_joins(PlannerInfo *root, Relids *special_join_rels)
+{
+	ListCell *lc;
+	foreach(lc, root->join_info_list)
+	{
+		SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
+		int bit = -1;
+		while ((bit = bms_next_member(info->min_lefthand, bit)) >= 0)
+		{
+			RelOptInfo *rel = find_base_rel(root, bit);
+			special_join_rels[rel->relid] =
+				bms_add_members(special_join_rels[rel->relid], info->min_righthand);
+		}
+
+		bit = -1;
+		while ((bit = bms_next_member(info->min_righthand, bit)) >= 0)
+		{
+			RelOptInfo *rel = find_base_rel(root, bit);
+			special_join_rels[rel->relid] =
+				bms_add_members(special_join_rels[rel->relid], info->min_lefthand);
+		}
+	}
+}
+
+/*
+ * Find and remove unique self joins in the entire join tree.
+ *
+ * We try to find joins where the same physical relation is joined to
+ * itself on all the columns of a unique index. Such joins can be
+ * can be replaced with a scan using the combined filters.
+ *
+ * When we wind such a join, we mark one of the participating relation as
+ * dead, and rewrite all references to it to point to the remaining
+ * relation.
+ *
+ * 'targetlist' is the top-level targetlist of query. We fix any references
+ * it has to the relations we remove.
+ */
+void
+remove_useless_self_joins(PlannerInfo *root, List **joinlist, List *targetlist)
+{
+	UsjScratch scratch;
+
+	scratch.relids = palloc(root->simple_rel_array_size * sizeof(Index));
+	scratch.special_join_rels = palloc0(root->simple_rel_array_size * sizeof(Relids));
+	scratch.joinrelids = NULL;
+	scratch.targetlist = targetlist;
+
+	find_special_joins(root, scratch.special_join_rels);
+	remove_self_joins_recurse(root, joinlist, &scratch);
 }
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index b05adc7..49c5f61 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -196,7 +196,7 @@ query_planner(PlannerInfo *root, List *tlist,
 	 * jointree preprocessing, but the necessary information isn't available
 	 * until we've built baserel data structures and classified qual clauses.
 	 */
-	joinlist = remove_useless_joins(root, joinlist);
+	joinlist = remove_useless_left_joins(root, joinlist);
 
 	/*
 	 * Also, reduce any semijoins with unique inner rels to plain inner joins.
@@ -205,6 +205,11 @@ query_planner(PlannerInfo *root, List *tlist,
 	reduce_unique_semijoins(root);
 
 	/*
+	 * Remove self joins on a unique column.
+	 */
+	remove_useless_self_joins(root, &joinlist, tlist);
+
+	/*
 	 * Now distribute "placeholders" to base rels as needed.  This has to be
 	 * done after join removal because removal could change whether a
 	 * placeholder is evaluable at a base rel.
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index dbf9adc..cb33509 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1585,7 +1585,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
 		relation_has_unique_index_for(root, rel, NIL,
 									  sjinfo->semi_rhs_exprs,
-									  sjinfo->semi_operators))
+									  sjinfo->semi_operators,
+									  NULL /*index_info*/))
 	{
 		pathnode->umethod = UNIQUE_PATH_NOOP;
 		pathnode->path.rows = rel->rows;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index c69740e..dc10ac3 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -38,14 +38,10 @@ typedef struct JoinHashEntry
 
 static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 					RelOptInfo *input_rel);
-static List *build_joinrel_restrictlist(PlannerInfo *root,
-						   RelOptInfo *joinrel,
-						   RelOptInfo *outer_rel,
-						   RelOptInfo *inner_rel);
 static void build_joinrel_joinlist(RelOptInfo *joinrel,
 					   RelOptInfo *outer_rel,
 					   RelOptInfo *inner_rel);
-static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
+static List *subbuild_joinrel_restrictlist(Relids joinrelids,
 							  List *joininfo_list,
 							  List *new_restrictlist);
 static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel,
@@ -542,7 +538,7 @@ build_join_rel(PlannerInfo *root,
 		 */
 		if (restrictlist_ptr)
 			*restrictlist_ptr = build_joinrel_restrictlist(root,
-														   joinrel,
+														   joinrel->relids,
 														   outer_rel,
 														   inner_rel);
 		return joinrel;
@@ -644,7 +640,7 @@ build_join_rel(PlannerInfo *root,
 	 * caller might or might not need the restrictlist, but I need it anyway
 	 * for set_joinrel_size_estimates().)
 	 */
-	restrictlist = build_joinrel_restrictlist(root, joinrel,
+	restrictlist = build_joinrel_restrictlist(root, joinrel->relids,
 											  outer_rel, inner_rel);
 	if (restrictlist_ptr)
 		*restrictlist_ptr = restrictlist;
@@ -1001,7 +997,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
  *	  the various joinlist entries ultimately refer to RestrictInfos
  *	  pushed into them by distribute_restrictinfo_to_rels().
  *
- * 'joinrel' is a join relation node
+ * 'joinrelids' is a join relation id set
  * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
  *		to form joinrel.
  *
@@ -1014,9 +1010,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
  * RestrictInfo nodes are no longer context-dependent.  Instead, just include
  * the original nodes in the lists made for the join relation.
  */
-static List *
+List *
 build_joinrel_restrictlist(PlannerInfo *root,
-						   RelOptInfo *joinrel,
+						   Relids joinrelids,
 						   RelOptInfo *outer_rel,
 						   RelOptInfo *inner_rel)
 {
@@ -1027,8 +1023,8 @@ build_joinrel_restrictlist(PlannerInfo *root,
 	 * eliminating any duplicates (important since we will see many of the
 	 * same clauses arriving from both input relations).
 	 */
-	result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL);
-	result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result);
+	result = subbuild_joinrel_restrictlist(joinrelids, outer_rel->joininfo, NIL);
+	result = subbuild_joinrel_restrictlist(joinrelids, inner_rel->joininfo, result);
 
 	/*
 	 * Add on any clauses derived from EquivalenceClasses.  These cannot be
@@ -1037,7 +1033,7 @@ build_joinrel_restrictlist(PlannerInfo *root,
 	 */
 	result = list_concat(result,
 						 generate_join_implied_equalities(root,
-														  joinrel->relids,
+														  joinrelids,
 														  outer_rel->relids,
 														  inner_rel));
 
@@ -1063,7 +1059,7 @@ build_joinrel_joinlist(RelOptInfo *joinrel,
 }
 
 static List *
-subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
+subbuild_joinrel_restrictlist(Relids joinrelids,
 							  List *joininfo_list,
 							  List *new_restrictlist)
 {
@@ -1073,7 +1069,7 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
 
-		if (bms_is_subset(rinfo->required_relids, joinrel->relids))
+		if (bms_is_subset(rinfo->required_relids, joinrelids))
 		{
 			/*
 			 * This clause becomes a restriction clause for the joinrel, since
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 7cae3fc..6bc8bb0 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -515,8 +515,10 @@ typedef struct PartitionSchemeData *PartitionScheme;
  * populate these fields, for base rels; but someday they might be used for
  * join rels too:
  *
- *		unique_for_rels - list of Relid sets, each one being a set of other
- *					rels for which this one has been proven unique
+ *		unique_for_rels - list of (Relids, UniqueIndexInfo*) lists, where Relids
+ * 					is a set of other rels for which this one has been proven
+ *					unique, and UniqueIndexInfo* stores information about the
+ * 					index that makes it unique,	if any.
  *		non_unique_for_rels - list of Relid sets, each one being a set of
  *					other rels for which we have tried and failed to prove
  *					this one unique
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 4ba358e..d78923a 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -272,6 +272,11 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
 			   RelOptInfo *inner_rel,
 			   SpecialJoinInfo *sjinfo,
 			   List **restrictlist_ptr);
+
+extern List *build_joinrel_restrictlist(PlannerInfo *root,
+						   Relids joinrelids,
+						   RelOptInfo *outer_rel,
+						   RelOptInfo *inner_rel);
 extern Relids min_join_parameterization(PlannerInfo *root,
 						  Relids joinrelids,
 						  RelOptInfo *outer_rel,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde30..3c74e13 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -71,9 +71,21 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
  *	  routines to generate index paths
  */
 extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
+
+/*
+ * UniqueIndexInfo describes a unique index and its corresponding clauses
+ * that guarantee the uniqueness of a relation.
+ */
+typedef struct UniqueIndexInfo
+{
+	IndexOptInfo *index;
+	List *clauses;
+} UniqueIndexInfo;
+
 extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
 							  List *restrictlist,
-							  List *exprlist, List *oprlist);
+							  List *exprlist, List *oprlist,
+							  UniqueIndexInfo **info);
 extern bool indexcol_is_bool_constant_for_query(IndexOptInfo *index,
 									int indexcol);
 extern bool match_index_to_operand(Node *operand, int indexcol,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index c8ab028..701aeab 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -103,13 +103,19 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
 /*
  * prototypes for plan/analyzejoins.c
  */
-extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern List *remove_useless_left_joins(PlannerInfo *root, List *joinlist);
 extern void reduce_unique_semijoins(PlannerInfo *root);
 extern bool query_supports_distinctness(Query *query);
 extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
+
+typedef struct UniqueIndexInfo UniqueIndexInfo;
 extern bool innerrel_is_unique(PlannerInfo *root,
 				   Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel,
-				   JoinType jointype, List *restrictlist, bool force_cache);
+				   JoinType jointype, List *restrictlist, bool force_cache,
+				   UniqueIndexInfo **index_info);
+
+extern void remove_useless_self_joins(PlannerInfo *root, List **jointree,
+									  List *tlist);
 
 /*
  * prototypes for plan/setrefs.c
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index dc6262b..37944a0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4307,11 +4307,13 @@ explain (costs off)
 select p.* from
   (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
   where p.k = 1 and p.k = 2;
-        QUERY PLAN        
---------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Result
    One-Time Filter: false
-(2 rows)
+   ->  Index Scan using parent_pkey on parent x
+         Index Cond: (k = 1)
+(4 rows)
 
 -- bug 5255: this is not optimizable by join removal
 begin;
@@ -4428,6 +4430,67 @@ select * from
 (0 rows)
 
 --
+-- test that semi- or inner self-joins on a unique column are removed
+--
+create table sj (a int unique, b int);
+insert into sj values (1, null), (null, 2), (2, 1);
+analyze sj;
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ a | b 
+---+---
+ 2 | 1
+(1 row)
+
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b = (a - 1)))
+(2 rows)
+
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+				where q.a = p.a and q.b < 10);
+                QUERY PLAN                
+------------------------------------------
+ Seq Scan on sj q
+   Filter: ((a IS NOT NULL) AND (b < 10))
+(2 rows)
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Seq Scan on sj t2
+   Filter: (a IS NOT NULL)
+   SubPlan 1
+     ->  Result
+           One-Time Filter: (t2.a = t2.a)
+           ->  Seq Scan on sj
+                 Filter: (a = t2.a)
+(7 rows)
+
+-- equality to different constants
+create unique index sj_ab on sj(a, b);
+explain (costs off)
+select * from sj t1, sj t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+          QUERY PLAN          
+------------------------------
+ Nested Loop
+   Join Filter: (t1.a = t2.a)
+   ->  Seq Scan on sj t1
+         Filter: (b = 1)
+   ->  Seq Scan on sj t2
+         Filter: (b = 2)
+(6 rows)
+
+--
 -- Test hints given on incorrect column references are useful
 --
 select t1.uunique1 from
@@ -5871,6 +5934,8 @@ left join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
          Output: j2.id1, j2.id2
 (8 rows)
 
+-- !!!FIXME this test doesn't break if I set skip_mark_restore to true.
+-- Also should avoid unique self join removal by using two different relations.
 -- validate logic in merge joins which skips mark and restore.
 -- it should only do this if all quals which were used to detect the unique
 -- are present as join quals, and not plain quals.
@@ -5882,14 +5947,11 @@ create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1;
 explain (costs off) select * from j1 j1
 inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1;
-                 QUERY PLAN                 
---------------------------------------------
- Merge Join
-   Merge Cond: (j1.id1 = j2.id1)
-   Join Filter: (j1.id2 = j2.id2)
-   ->  Index Scan using j1_id1_idx on j1
-   ->  Index Scan using j1_id1_idx on j1 j2
-(5 rows)
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Seq Scan on j1 j2
+   Filter: ((id1 IS NOT NULL) AND (id2 IS NOT NULL) AND ((id1 % 1000) = 1))
+(2 rows)
 
 select * from j1 j1
 inner join j1 j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index d3ba2a1..6f18c73 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1528,6 +1528,37 @@ select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
 
 --
+-- test that semi- or inner self-joins on a unique column are removed
+--
+
+create table sj (a int unique, b int);
+insert into sj values (1, null), (null, 2), (2, 1);
+analyze sj;
+
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+				where q.a = p.a and q.b < 10);
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+
+-- equality to different constants
+create unique index sj_ab on sj(a, b);
+
+explain (costs off)
+select * from sj t1, sj t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+
+--
 -- Test hints given on incorrect column references are useful
 --
 
@@ -1968,6 +1999,9 @@ explain (verbose, costs off)
 select * from j1
 left join j2 on j1.id1 = j2.id1 where j1.id2 = 1;
 
+-- !!!FIXME this test doesn't break if I set skip_mark_restore to true.
+-- Also should avoid unique self join removal by using two different relations.
+
 -- validate logic in merge joins which skips mark and restore.
 -- it should only do this if all quals which were used to detect the unique
 -- are present as join quals, and not plain quals.
-- 
2.7.4

Re: Removing unneeded self joins

Reply via email to