From 60c6662b6782d5d4ad4bba0c57fd5b9fecee7364 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= <yizhi.fzh@alibaba-inc.com>
Date: Mon, 16 Mar 2020 00:48:13 +0800
Subject: [PATCH v4] [Patch] Bypass distinctClause & groupbyClause if the exprs
 is unique already

---
 src/backend/nodes/list.c                      |  27 +
 src/backend/optimizer/path/Makefile           |   3 +-
 src/backend/optimizer/path/allpaths.c         |   7 +-
 src/backend/optimizer/path/joinrels.c         |   2 +
 src/backend/optimizer/path/uniquekeys.c       | 473 ++++++++++++++++++
 src/backend/optimizer/plan/initsplan.c        |   9 +-
 src/backend/optimizer/plan/planner.c          |  43 ++
 src/backend/optimizer/util/plancat.c          |   8 +
 src/include/nodes/pathnodes.h                 |  27 +
 src/include/nodes/pg_list.h                   |   2 +
 src/include/optimizer/paths.h                 |  17 +
 src/test/regress/expected/aggregates.out      |  38 +-
 src/test/regress/expected/join.out            |  26 +-
 src/test/regress/expected/select_distinct.out | 276 ++++++++++
 src/test/regress/sql/select_distinct.sql      |  84 ++++
 15 files changed, 1003 insertions(+), 39 deletions(-)
 create mode 100644 src/backend/optimizer/path/uniquekeys.c

diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index bd0c58cd81..a54c8e66bb 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -688,6 +688,33 @@ list_member_oid(const List *list, Oid datum)
 	return false;
 }
 
+/* 
+ * Return ture iff there is an equal member in target for every
+ * member in members
+ */
+bool
+list_all_members_in(const List *members, const List *target)
+{
+	const ListCell	*lc1, *lc2;
+	if (target == NIL && members != NIL)
+		return false;
+	foreach(lc1, members)
+	{
+		bool found = false;
+		foreach(lc2, target)
+		{
+			if (equal(lfirst(lc1), lfirst(lc2)))
+			{
+				found = true;
+				break;
+			}
+		}
+		if (!found)
+			return false;
+	}
+	return true;
+}
+
 /*
  * Delete the n'th cell (counting from 0) in list.
  *
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 1e199ff66f..7b9820c25f 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	joinpath.o \
 	joinrels.o \
 	pathkeys.o \
-	tidpath.o
+	tidpath.o \
+	uniquekeys.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 8286d9cf34..2c65d3715b 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -158,6 +158,7 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	/*
 	 * Construct the all_baserels Relids set.
 	 */
+
 	root->all_baserels = NULL;
 	for (rti = 1; rti < root->simple_rel_array_size; rti++)
 	{
@@ -578,7 +579,9 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 * first since partial unique indexes can affect size estimates.
 	 */
 	check_index_predicates(root, rel);
-
+	
+	populate_baserel_uniquekeys(root, rel);
+	
 	/* Mark rel with estimated output rows, width, etc */
 	set_baserel_size_estimates(root, rel);
 }
@@ -2321,6 +2324,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		return;
 	}
 
+	rel->uniquekeys = sub_final_rel->uniquekeys;
+	
 	/*
 	 * Mark rel with estimated output rows, width, etc.  Note that we have to
 	 * do this before generating outer-query paths, else cost_subqueryscan is
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index a21c295b99..f1243f31e7 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -920,6 +920,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 
 	/* Apply partitionwise join technique, if possible. */
 	try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
+
+	propagate_unique_keys_to_joinrel(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype);
 }
 
 
diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c
new file mode 100644
index 0000000000..1be2c4f0db
--- /dev/null
+++ b/src/backend/optimizer/path/uniquekeys.c
@@ -0,0 +1,473 @@
+/*-------------------------------------------------------------------------
+ *
+ * uniquekeys.c
+ *	  Utilities for matching and building unique keys
+ *
+ * Portions Copyright (c) 2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/path/uniquekeys.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+
+/*
+ * Examine the rel's restriction clauses for usable var = const clauses
+ */
+static List*
+get_mergeable_const_restrictlist(RelOptInfo *rel)
+{
+	List	*restrictlist = NIL;
+	ListCell	*lc;
+	foreach(lc, rel->baserestrictinfo)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+
+		/*
+		 * Note: can_join won't be set for a restriction clause, but
+		 * mergeopfamilies will be if it has a mergejoinable operator and
+		 * doesn't contain volatile functions.
+		 */
+		if (restrictinfo->mergeopfamilies == NIL)
+			continue;			/* not mergejoinable */
+
+		/* XXX can't we check if is a Const */
+
+		/*
+		 * The clause certainly doesn't refer to anything but the given rel.
+		 * If either side is pseudoconstant then we can use it.
+		 */
+		if (bms_is_empty(restrictinfo->left_relids))
+		{
+			/* righthand side is inner */
+			restrictinfo->outer_is_left = true;
+		}
+		else if (bms_is_empty(restrictinfo->right_relids))
+		{
+			/* lefthand side is inner */
+			restrictinfo->outer_is_left = false;
+		}
+		else
+			continue;
+
+		/* OK, add to list */
+		restrictlist = lappend(restrictlist, restrictinfo);
+	}
+
+	return restrictlist;
+
+}
+
+
+/*
+ * Return true if uk = Const in the restrictlist
+ */
+static bool
+match_index_to_restrictinfo(IndexOptInfo *unique_ind, List *restrictlist)
+{
+	int c = 0;
+
+	if (restrictlist == NIL)
+		return false;
+
+	for(c = 0;  c < unique_ind->nkeycolumns; c++)
+	{
+		ListCell	*lc;
+		foreach(lc, restrictlist)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+			Node	   *rexpr;
+
+			/*
+			 * The condition's equality operator must be a member of the
+			 * index opfamily, else it is not asserting the right kind of
+			 * equality behavior for this index.  We check this first
+			 * since it's probably cheaper than match_index_to_operand().
+			 */
+			if (!list_member_oid(rinfo->mergeopfamilies, unique_ind->opfamily[c]))
+				continue;
+
+			/*
+			 * XXX at some point we may need to check collations here too.
+			 * For the moment we assume all collations reduce to the same
+			 * notion of equality.
+			 */
+
+			/* OK, see if the condition operand matches the index key */
+			if (rinfo->outer_is_left)
+				rexpr = get_rightop(rinfo->clause);
+			else
+				rexpr = get_leftop(rinfo->clause);
+
+			if (!match_index_to_operand(rexpr, c, unique_ind))
+			{
+				return false;
+			}
+		}
+	}
+	return true;
+}
+
+/*
+ * add_uniquekey_from_index
+ * 	We only add the Index Vars whose expr exists in rel->reltarget
+ */
+static void
+add_uniquekey_from_index(RelOptInfo *rel, IndexOptInfo *unique_index)
+{
+	int	c = 0;
+	List	*exprs = NIL;
+
+	/* We only add the index which exists in rel->reltarget */
+	for(c = 0; c < unique_index->nkeycolumns; c++)
+	{
+		ListCell	*lc;
+		bool	find_in_exprs = false;
+		foreach(lc, rel->reltarget->exprs)
+		{
+			Var *var;
+			/* We never knows a FuncExpr is nullable or not,  we only handle Var now */
+			if (!IsA(lfirst(lc), Var))
+				continue;
+			var = lfirst_node(Var, lc);
+			if (var->varattno < InvalidAttrNumber)
+				/* System column */
+				continue;
+			/* Must check not null for unqiue index */
+			if (!bms_is_member(var->varattno, rel->not_null_cols))
+				continue;
+
+			/* To keep the uniquekey short, We only add it if it exists in rel->reltrget->exprs */
+			if (match_index_to_operand((Node *)lfirst(lc), c, unique_index))
+			{
+				find_in_exprs = true;
+				exprs = lappend(exprs, lfirst(lc));
+				break;
+			}
+		}
+		if (!find_in_exprs)
+			return;
+	}
+	rel->uniquekeys = lappend(rel->uniquekeys, exprs);
+}
+
+/*
+ * populate_baserel_uniquekeys
+ *		Populate 'baserel' uniquekeys list by looking at the rel's unique index
+ * add baserestrictinfo     
+ */
+void
+populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *baserel)
+{
+	ListCell *lc;
+	List	*restrictlist = get_mergeable_const_restrictlist(baserel);
+	bool	return_one_row = false;
+	List	*matched_uk_indexes = NIL;
+
+	Assert(baserel->rtekind == RTE_RELATION);
+
+	foreach(lc, baserel->indexlist)
+	{
+		IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
+		/*
+		 * If the index is not unique, or not immediately enforced, or if it's
+		 * a partial index that doesn't match the query, it's useless here.
+		 */
+		if (!ind->unique || !ind->immediate ||
+			(ind->indpred != NIL && !ind->predOK))
+			continue;
+
+		if (match_index_to_restrictinfo(ind, restrictlist))
+		{
+			return_one_row = true;
+			break;
+		}
+		matched_uk_indexes = lappend(matched_uk_indexes, ind);
+	}
+
+	if (return_one_row)
+	{
+		foreach(lc,  baserel->reltarget->exprs)
+		{
+			/* Every columns in this relation is unqiue since only 1 row returned
+			 * No bother to check it is a var or not, also we don't need the check nullable
+			 */
+			baserel->uniquekeys = lappend(baserel->uniquekeys,
+										  list_make1(lfirst(lc)));
+		}
+	}
+	else
+	{
+		foreach(lc,   matched_uk_indexes)
+			add_uniquekey_from_index(baserel, lfirst_node(IndexOptInfo, lc));
+	}
+}
+
+
+/*
+ * relation_has_uniquekeys_for
+ *		Returns true if we have proofs that 'rel' cannot return multiple rows with
+ *		the same values in each of 'exprs'.  Otherwise returns false.
+ */
+bool
+relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs)
+{
+	ListCell *lc;
+
+	foreach(lc, rel->uniquekeys)
+	{
+		List *unique_exprs = lfirst_node(List, lc);
+		if (unique_exprs == NIL)
+			continue;
+		if (list_all_members_in(unique_exprs, exprs))
+			return true;
+	}
+	return false;
+}
+
+/*
+ * clause_sides_match_join
+ *	  Determine whether a join clause is of the right form to use in this join.
+ *
+ * We already know that the clause is a binary opclause referencing only the
+ * rels in the current join.  The point here is to check whether it has the
+ * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
+ * rather than mixing outer and inner vars on either side.  If it matches,
+ * we set the transient flag outer_is_left to identify which side is which.
+ */
+static inline bool
+clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
+						Relids innerrelids)
+{
+	if (bms_is_subset(rinfo->left_relids, outerrelids) &&
+		bms_is_subset(rinfo->right_relids, innerrelids))
+	{
+		/* lefthand side is outer */
+		rinfo->outer_is_left = true;
+		return true;
+	}
+	else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
+			 bms_is_subset(rinfo->right_relids, outerrelids))
+	{
+		/* righthand side is outer */
+		rinfo->outer_is_left = false;
+		return true;
+	}
+	return false;				/* no good for these input relations */
+}
+
+/*
+ * clauselist_matches_uniquekeys
+ *   Detect the pattern that rel1.uk_expr =  rel2.normal_expr in clause_list, 
+ * if so, we are sure that the UniqueKey of rel2 still can be unqiue key in joinrel.
+ */
+static bool
+clauselist_matches_uniquekeys(List *clause_list, List *uniquekey,  bool outer_side)
+{
+	ListCell *lc;
+
+	if (uniquekey == NIL)
+		return false;
+
+	foreach(lc, uniquekey)
+	{
+		Node *expr = (Node *)lfirst(lc);
+		ListCell *lc2;
+		bool matched_expr = false;
+
+		foreach(lc2, clause_list)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *)lfirst(lc2);
+			Node	   *rexpr;
+
+			/*
+			 * The condition's equality operator must be a member of the
+			 * index opfamily, else it is not asserting the right kind of
+			 * equality behavior for this index.  We check this first
+			 * since it's probably cheaper than match_index_to_operand().
+			 */
+			/* XXXXX do we need this?  uk_opfamily is a Concept when we come to Index
+			 * for UniqueKey looks we don't need it
+			 **/
+			/* if (!list_member_oid(rinfo->mergeopfamilies, key->uk_opfamily)) */
+			/* 	continue; */
+
+			/*
+			 * XXX at some point we may need to check collations here too.
+			 * For the moment we assume all collations reduce to the same
+			 * notion of equality.
+			 */
+
+			 /* OK, see if the condition operand matches the index key */
+			if (rinfo->outer_is_left != outer_side)
+				rexpr = get_rightop(rinfo->clause);
+			else
+				rexpr = get_leftop(rinfo->clause);
+
+			if (IsA(rexpr, RelabelType))
+				rexpr = (Node *)((RelabelType *)rexpr)->arg;
+
+			if (equal(rexpr, expr))
+			{
+				matched_expr = true;
+				break;
+			}
+		}
+
+		if (!matched_expr)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Used to record if a uniquekey has been added to joinrel, if so we don't
+ * need to add other superset of this uniquekey to the joinrel.
+ */
+typedef struct UniqueKeyContextData
+{
+	List	*uniquekey;
+	/* Set to true if the unique key has been added to joinrel->uniquekeys */
+	bool	added_to_joinrel;
+} *UniqueKeyContext;
+
+
+/*
+ * initililze_unqiuecontext_for_joinrel
+ * Return a List of UniqueKeyContext for an inputrel, we also filter out 
+ * all the unqiuekeys which are not possible to use later
+ */
+static List *
+initililze_unqiuecontext_for_joinrel(RelOptInfo *joinrel,  RelOptInfo *inputrel)
+{
+	List	*res = NIL;
+	ListCell *lc;
+	foreach(lc,  inputrel->uniquekeys)
+	{
+		UniqueKeyContext context;
+		/* If it isn't shown in joinrel->reltarget->exprs, it will be not referenced by others */
+		if (!list_all_members_in(lfirst_node(List, lc), joinrel->reltarget->exprs))
+			continue;
+		context = palloc(sizeof(struct UniqueKeyContextData));
+		context->uniquekey = lfirst_node(List, lc);
+		context->added_to_joinrel = false;
+		res = lappend(res, context);
+	}
+	return res;
+
+}
+
+/*
+ * propagate_unique_keys_to_joinrel
+ *		Using 'restrictlist' determine if rel2 can duplicate rows in rel1 and
+ *		vice-versa.  If the relation at the other side of the join cannot
+ *		cause row duplication, then tag the uniquekeys for the relation onto
+ *		'joinrel's uniquekey list.
+ */
+void
+propagate_unique_keys_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
+								 RelOptInfo *rel1, RelOptInfo *rel2,
+								 List *restrictlist, JoinType jointype)
+{
+	ListCell *lc, *lc2;
+	List	*clause_list = NIL;
+	List	*rel1_uniquekey_context;
+	List	*rel2_uniquekey_context;
+
+	/* Care about the left relation only for SEMI/ANTI join */
+	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+	{
+		foreach(lc, rel1->uniquekeys)
+		{
+			List	*uniquekey = lfirst_node(List, lc);
+			if (list_all_members_in(uniquekey, joinrel->reltarget->exprs))
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey);
+		}
+		return;
+	}
+
+	rel1_uniquekey_context = initililze_unqiuecontext_for_joinrel(joinrel, rel1);
+	rel2_uniquekey_context = initililze_unqiuecontext_for_joinrel(joinrel, rel2);
+	
+	if (rel1_uniquekey_context == NIL || rel2_uniquekey_context == NIL)
+		return;
+
+	foreach(lc, restrictlist)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *)lfirst(lc);
+
+
+		if (IS_OUTER_JOIN(jointype) &&
+			RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
+			continue;
+
+		/* Ignore if it's not a mergejoinable clause */
+		if (!restrictinfo->can_join ||
+			restrictinfo->mergeopfamilies == NIL)
+			continue;			/* not mergejoinable */
+
+		/*
+		 * Check if clause has the form "outer op inner" or "inner op outer",
+		 * and if so mark which side is inner.
+		 */
+		if (!clause_sides_match_join(restrictinfo, rel1->relids, rel2->relids))
+			continue;			/* no good for these input relations */
+
+		/* OK, add to list */
+		clause_list = lappend(clause_list, restrictinfo);
+	}
+
+	foreach(lc, rel1_uniquekey_context)
+	{
+		List	*uniquekey = ((UniqueKeyContext)lfirst(lc))->uniquekey;
+		if (clauselist_matches_uniquekeys(clause_list, uniquekey, true))
+		{
+			foreach(lc2, rel2_uniquekey_context)
+			{
+				UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc);
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey);
+				ctx->added_to_joinrel = true;
+			}
+			break;
+		}
+	}
+
+	foreach(lc, rel2_uniquekey_context)
+	{
+		List	*uniquekey = ((UniqueKeyContext)lfirst(lc))->uniquekey;
+		if (clauselist_matches_uniquekeys(clause_list, uniquekey, true))
+		{
+			foreach(lc2, rel1_uniquekey_context)
+			{
+				UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc);
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey);
+				ctx->added_to_joinrel = true;
+			}
+			break;
+		}
+	}
+
+	foreach(lc, rel1_uniquekey_context)
+	{
+		UniqueKeyContext context1 = (UniqueKeyContext) lfirst(lc);
+		if (context1->added_to_joinrel)
+			continue;
+		foreach(lc2, rel2_uniquekey_context)
+		{
+			UniqueKeyContext context2 = (UniqueKeyContext) lfirst(lc2);
+			List	*uniquekey = NIL;
+			if (context2->added_to_joinrel)
+				continue;
+			uniquekey = list_copy(context1->uniquekey);
+			uniquekey = list_concat(uniquekey, context2->uniquekey);
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey);
+		}
+	}
+}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index e978b491f6..a674e271e7 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -829,7 +829,14 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 		foreach(l, (List *) f->quals)
 		{
 			Node	   *qual = (Node *) lfirst(l);
-
+			ListCell	*lc;
+			foreach(lc, find_nonnullable_vars(qual))
+			{
+				Var *var = lfirst_node(Var, lc);
+				RelOptInfo *rel = root->simple_rel_array[var->varno];
+				if (var->varattno > InvalidAttrNumber)
+					rel->not_null_cols = bms_add_member(rel->not_null_cols, var->varattno);
+			}
 			distribute_qual_to_rels(root, qual,
 									false, below_outer_join, JOIN_INNER,
 									root->qual_security_level,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d6f2153593..e300dad442 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2389,6 +2389,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		add_path(final_rel, path);
 	}
 
+	/* Copy the uniquekeys to final_rel */
+	final_rel->uniquekeys = current_rel->uniquekeys;
+	
 	/*
 	 * Generate partial paths for final_rel, too, if outer query levels might
 	 * be able to make use of them.
@@ -3813,6 +3816,20 @@ create_grouping_paths(PlannerInfo *root,
 	Query	   *parse = root->parse;
 	RelOptInfo *grouped_rel;
 	RelOptInfo *partially_grouped_rel;
+	ListCell	*lc;
+
+	List	*required_unique_keys = get_sortgrouplist_exprs(parse->groupClause,
+															parse->targetList);
+	/*
+	 * If the groupby clauses is unique already,  groupping node is not necessary
+	 * if there is no aggreation functions
+	 */
+	if (required_unique_keys != NIL &&
+		!parse->hasAggs &&
+		!parse->hasWindowFuncs &&
+		parse->havingQual == NULL &&
+		relation_has_uniquekeys_for(root, input_rel, required_unique_keys))
+		return input_rel;
 
 	/*
 	 * Create grouping relation to hold fully aggregated grouping and/or
@@ -3901,6 +3918,19 @@ create_grouping_paths(PlannerInfo *root,
 	}
 
 	set_cheapest(grouped_rel);
+
+	/* Copy the upperrel's uniquekeys to grouped_rel and add the one which caused
+	 * by groupBy clause
+	 */
+	foreach(lc, input_rel->uniquekeys)
+	{
+		List	*uniquekey = lfirst_node(List, lc);
+		if (list_all_members_in(uniquekey, grouped_rel->reltarget->exprs))
+			grouped_rel->uniquekeys = lappend(grouped_rel->uniquekeys, uniquekey);
+	}
+	if (required_unique_keys != NIL)
+		grouped_rel->uniquekeys = lappend(grouped_rel->uniquekeys,
+										  required_unique_keys);
 	return grouped_rel;
 }
 
@@ -4736,6 +4766,12 @@ create_distinct_paths(PlannerInfo *root,
 	bool		allow_hash;
 	Path	   *path;
 	ListCell   *lc;
+	List	   *required_unique_keys =  get_sortgrouplist_exprs(parse->distinctClause,
+																parse->targetList);
+
+	/* If we the result if unqiue already, we just return the input_rel directly */
+	if (relation_has_uniquekeys_for(root, input_rel, required_unique_keys))
+		return input_rel;
 
 	/* For now, do all work in the (DISTINCT, NULL) upperrel */
 	distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
@@ -4920,6 +4956,10 @@ create_distinct_paths(PlannerInfo *root,
 	/* Now choose the best path(s) */
 	set_cheapest(distinct_rel);
 
+	/* All the UK before distinct is still valid and we can add one more required_unique_keys */
+	distinct_rel->uniquekeys = list_copy(input_rel->uniquekeys);
+	distinct_rel->uniquekeys = lappend(distinct_rel->uniquekeys,
+											required_unique_keys);
 	return distinct_rel;
 }
 
@@ -5066,6 +5106,9 @@ create_ordered_paths(PlannerInfo *root,
 	 * need us to do it.
 	 */
 	Assert(ordered_rel->pathlist != NIL);
+	
+	/* Copy the unique keys */
+	ordered_rel->uniquekeys = input_rel->uniquekeys;
 
 	return ordered_rel;
 }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index d82fc5ab8b..34d30b181c 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -117,6 +117,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	Relation	relation;
 	bool		hasindex;
 	List	   *indexinfos = NIL;
+	int			i;
 
 	/*
 	 * We need not lock the relation since it was already locked, either by
@@ -460,6 +461,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 		set_relation_partition_info(root, rel, relation);
 
+	Assert(rel->not_null_cols == NULL);
+	for(i = 0; i < relation->rd_att->natts; i++)
+	{
+		if (relation->rd_att->attrs[i].attnotnull)
+			rel->not_null_cols = bms_add_member(rel->not_null_cols, i+1);
+	}
+
 	table_close(relation, NoLock);
 
 	/*
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 3d3be197e0..ee26cfdb5c 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -687,6 +687,7 @@ typedef struct RelOptInfo
 	PlannerInfo *subroot;		/* if subquery */
 	List	   *subplan_params; /* if subquery */
 	int			rel_parallel_workers;	/* wanted number of parallel workers */
+	Relids		not_null_cols; /* the non null column for this relation, start from 1 */
 
 	/* Information about foreign tables and foreign joins */
 	Oid			serverid;		/* identifies server for the table or join */
@@ -706,6 +707,7 @@ typedef struct RelOptInfo
 	QualCost	baserestrictcost;	/* cost of evaluating the above */
 	Index		baserestrict_min_security;	/* min security_level found in
 											 * baserestrictinfo */
+	List	   *uniquekeys;		/* List of Var groups */
 	List	   *joininfo;		/* RestrictInfo structures for join clauses
 								 * involving this rel */
 	bool		has_eclass_joins;	/* T means joininfo is incomplete */
@@ -1016,6 +1018,31 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/* UniqueKeySet
+ *
+ * Represents a set of unique keys
+ */
+typedef struct UniqueKeySet
+{
+	NodeTag		type;
+
+	Bitmapset *non_null_keys;	/* indexes of 'keys' proved non-null */
+	List		*keys;	/* list of UniqueKeys */
+} UniqueKeySet;
+
+/*
+ * UniqueKey
+ *
+ * Represents the unique properties held by a RelOptInfo or a Path
+ */
+typedef struct UniqueKey
+{
+	NodeTag		type;
+
+	Oid			uk_collation;	/* collation, if datatypes are collatable */
+	Oid			uk_opfamily;	/* btree opfamily defining the ordering */
+	Expr	   *uk_expr;		/* unique key expression */
+} UniqueKey;
 
 /*
  * PathTarget
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 14ea2766ad..5dfb93895c 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -528,6 +528,8 @@ extern bool list_member_ptr(const List *list, const void *datum);
 extern bool list_member_int(const List *list, int datum);
 extern bool list_member_oid(const List *list, Oid datum);
 
+extern bool list_all_members_in(const List *members, const List *target);
+
 extern List *list_delete(List *list, void *datum);
 extern List *list_delete_ptr(List *list, void *datum);
 extern List *list_delete_int(List *list, int datum);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9ab73bd20c..f0cc2c4245 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -240,4 +240,21 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 									List *live_childrels);
 
+/*
+ * uniquekeys.c
+ *	  Utilities for matching and building unique keys
+ */
+extern void populate_baserel_uniquekeys(PlannerInfo *root,
+										RelOptInfo *baserel);
+extern bool relation_has_uniquekeys_for(PlannerInfo *root,
+										RelOptInfo *rel,
+										List *exprs);
+										
+extern void propagate_unique_keys_to_joinrel(PlannerInfo *root,
+											 RelOptInfo *joinrel,
+											 RelOptInfo *rel1,
+											 RelOptInfo *rel2,
+											 List *restrictlist,
+											 JoinType jointype);
+
 #endif							/* PATHS_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f457b5b150..07dab0ed5d 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1092,12 +1092,10 @@ create temp table t2 (x int, y int, z int, primary key (x, y));
 create temp table t3 (a int, b int, c int, primary key(a, b) deferrable);
 -- Non-primary-key columns can be removed from GROUP BY
 explain (costs off) select * from t1 group by a,b,c,d;
-      QUERY PLAN      
-----------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on t1
-(3 rows)
+   QUERY PLAN   
+----------------
+ Seq Scan on t1
+(1 row)
 
 -- No removal can happen if the complete PK is not present in GROUP BY
 explain (costs off) select a,c from t1 group by a,c,d;
@@ -1112,16 +1110,14 @@ explain (costs off) select a,c from t1 group by a,c,d;
 explain (costs off) select *
 from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
 group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
-                      QUERY PLAN                      
-------------------------------------------------------
- HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.y
-   ->  Hash Join
-         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-         ->  Seq Scan on t2
-         ->  Hash
-               ->  Seq Scan on t1
-(7 rows)
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Join
+   Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+   ->  Seq Scan on t2
+   ->  Hash
+         ->  Seq Scan on t1
+(5 rows)
 
 -- Test case where t1 can be optimized but not t2
 explain (costs off) select t1.*,t2.x,t2.z
@@ -1161,12 +1157,10 @@ explain (costs off) select * from t1 group by a,b,c,d;
 
 -- Okay to remove columns if we're only querying the parent.
 explain (costs off) select * from only t1 group by a,b,c,d;
-      QUERY PLAN      
-----------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on t1
-(3 rows)
+   QUERY PLAN   
+----------------
+ Seq Scan on t1
+(1 row)
 
 create temp table p_t1 (
   a int,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 761376b007..a5d98cf421 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4417,33 +4417,31 @@ select d.* from d left join (select distinct * from b) s
 explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
-                QUERY PLAN                
-------------------------------------------
+             QUERY PLAN             
+------------------------------------
  Merge Right Join
    Merge Cond: (b.id = d.a)
-   ->  Group
-         Group Key: b.id
-         ->  Index Scan using b_pkey on b
+   ->  Index Scan using b_pkey on b
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(8 rows)
+(6 rows)
 
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+           QUERY PLAN            
+---------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  Seq Scan on b
 (9 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index f3696c6d1d..c27e7d4b67 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -244,3 +244,279 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
  t
 (1 row)
 
+create table select_distinct_a(pk1 int, pk2 char(20),  uk1 char(20) not null,  uk2 int, e int, primary key(pk1, pk2));
+create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2);
+create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2));
+-- distinct erased since (pk1, pk2)
+explain (costs off) select distinct * from select_distinct_a;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+(1 row)
+
+-- distinct can't be reased since since we required all the uk must be not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a;
+             QUERY PLAN              
+-------------------------------------
+ HashAggregate
+   Group Key: uk1, uk2
+   ->  Seq Scan on select_distinct_a
+(3 rows)
+
+-- distinct ereased since uk + not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 IS NOT NULL)
+(2 rows)
+
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 > 1)
+(2 rows)
+
+-- distinct erased due to group by
+explain (costs off) select distinct e from select_distinct_a group by e;
+             QUERY PLAN              
+-------------------------------------
+ HashAggregate
+   Group Key: e
+   ->  Seq Scan on select_distinct_a
+(3 rows)
+
+-- distinct erased due to the restirctinfo
+explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c';
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Index Scan using select_distinct_a_pkey on select_distinct_a
+   Index Cond: ((pk1 = 1) AND (pk2 = 'c'::bpchar))
+(2 rows)
+
+-- test join
+set enable_mergejoin to off;
+set enable_hashjoin to off;
+insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3);
+insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5);
+-- Cartesian join
+explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null;
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop
+   ->  Seq Scan on select_distinct_b b
+   ->  Materialize
+         ->  Seq Scan on select_distinct_a a
+               Filter: (uk2 IS NOT NULL)
+(5 rows)
+
+select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4;
+         uk1          | uk2 |         pk1          | pk2 
+----------------------+-----+----------------------+-----
+ a                    |   0 | a                    |   0
+ a                    |   0 | d                    |   0
+ a                    |   0 | e                    |   0
+ A                    |   0 | a                    |   0
+ A                    |   0 | d                    |   0
+ A                    |   0 | e                    |   0
+ c                    |   0 | a                    |   0
+ c                    |   0 | d                    |   0
+ c                    |   0 | e                    |   0
+(9 rows)
+
+-- left join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a);
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop Left Join
+   Join Filter: (a.pk1 = b.a)
+   ->  Seq Scan on select_distinct_a a
+   ->  Materialize
+         ->  Seq Scan on select_distinct_b b
+(5 rows)
+
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;;
+ pk1 |         pk2          |         pk1          | pk2 
+-----+----------------------+----------------------+-----
+   1 | a                    | a                    |   0
+   1 | a                    | e                    |   0
+   1 | b                    | a                    |   0
+   1 | b                    | e                    |   0
+   3 | c                    |                      |    
+(5 rows)
+
+-- right join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a);
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Nested Loop Left Join
+   ->  Seq Scan on select_distinct_b b
+   ->  Index Only Scan using select_distinct_a_pkey on select_distinct_a a
+         Index Cond: (pk1 = b.a)
+(4 rows)
+
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+ pk1 |         pk2          |         pk1          | pk2 
+-----+----------------------+----------------------+-----
+   1 | a                    | a                    |   0
+   1 | a                    | e                    |   0
+   1 | b                    | a                    |   0
+   1 | b                    | e                    |   0
+     |                      | d                    |   0
+(5 rows)
+
+-- full join
+explain (costs off)  select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a);
+                 QUERY PLAN                  
+---------------------------------------------
+ Hash Full Join
+   Hash Cond: (a.pk1 = b.a)
+   ->  Seq Scan on select_distinct_a a
+   ->  Hash
+         ->  Seq Scan on select_distinct_b b
+(5 rows)
+
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+ pk1 |         pk2          |         pk1          | pk2 
+-----+----------------------+----------------------+-----
+   1 | a                    | a                    |   0
+   1 | a                    | e                    |   0
+   1 | b                    | a                    |   0
+   1 | b                    | e                    |   0
+   3 | c                    |                      |    
+     |                      | d                    |   0
+(6 rows)
+
+-- distinct can't be erased since b.pk2 is missed
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: a.pk1, a.pk2, b.pk1
+         ->  Hash Full Join
+               Hash Cond: (a.pk1 = b.a)
+               ->  Seq Scan on select_distinct_a a
+               ->  Hash
+                     ->  Seq Scan on select_distinct_b b
+(8 rows)
+
+-- Semi/anti join
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop
+   ->  HashAggregate
+         Group Key: select_distinct_b.a
+         ->  Seq Scan on select_distinct_b
+   ->  Index Only Scan using select_distinct_a_pkey on select_distinct_a
+         Index Cond: (pk1 = select_distinct_b.a)
+(6 rows)
+
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b);
+              QUERY PLAN               
+---------------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (NOT (hashed SubPlan 1))
+   SubPlan 1
+     ->  Seq Scan on select_distinct_b
+(4 rows)
+
+-- we also can handle some limited subquery
+explain (costs off) select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Nested Loop
+   ->  HashAggregate
+         Group Key: select_distinct_b.a
+         ->  Seq Scan on select_distinct_b
+   ->  Index Scan using select_distinct_a_pkey on select_distinct_a a
+         Index Cond: (pk1 = select_distinct_b.a)
+(6 rows)
+
+select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3;
+ pk1 |         pk2          |         uk1          | uk2 | e | a 
+-----+----------------------+----------------------+-----+---+---
+   1 | a                    | a                    |   0 | 1 | 1
+   1 | b                    | A                    |   0 | 2 | 1
+(2 rows)
+
+explain (costs off) select distinct * from select_distinct_a a,  (select distinct a from select_distinct_b) b where a.pk1 = b.a;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Nested Loop
+   ->  HashAggregate
+         Group Key: select_distinct_b.a
+         ->  Seq Scan on select_distinct_b
+   ->  Index Scan using select_distinct_a_pkey on select_distinct_a a
+         Index Cond: (pk1 = select_distinct_b.a)
+(6 rows)
+
+select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3;
+ pk1 |         pk2          |         uk1          | uk2 | e | a 
+-----+----------------------+----------------------+-----+---+---
+   1 | a                    | a                    |   0 | 1 | 1
+   1 | b                    | A                    |   0 | 2 | 1
+(2 rows)
+
+-- Distinct On
+-- can't erase since pk2 is missed
+explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a;
+                QUERY PLAN                 
+-------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: pk1
+         ->  Seq Scan on select_distinct_a
+(4 rows)
+
+-- ok to erase
+explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+(1 row)
+
+-- test some view.
+create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+explain (costs off) select * from distinct_v1;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 IS NOT NULL)
+(2 rows)
+
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) select * from distinct_v1;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ HashAggregate
+   Group Key: select_distinct_a.uk1, select_distinct_a.uk2
+   ->  Seq Scan on select_distinct_a
+         Filter: (uk2 IS NOT NULL)
+(4 rows)
+
+alter table select_distinct_a alter column uk1 set not null;
+-- test generic plan
+prepare pt as select * from distinct_v1;
+explain (costs off)  execute pt;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 IS NOT NULL)
+(2 rows)
+
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) execute pt;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ HashAggregate
+   Group Key: select_distinct_a.uk1, select_distinct_a.uk2
+   ->  Seq Scan on select_distinct_a
+         Filter: (uk2 IS NOT NULL)
+(4 rows)
+
+drop view distinct_v1;
+drop table select_distinct_a;
+drop table select_distinct_b;
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index a605e86449..282ca58cf9 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -73,3 +73,87 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 SELECT 2 IS NOT DISTINCT FROM null as "no";
 SELECT null IS NOT DISTINCT FROM null as "yes";
+create table select_distinct_a(pk1 int, pk2 char(20),  uk1 char(20) not null,  uk2 int, e int, primary key(pk1, pk2));
+create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2);
+create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2));
+
+-- distinct erased since (pk1, pk2)
+explain (costs off) select distinct * from select_distinct_a;
+
+-- distinct can't be reased since since we required all the uk must be not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a;
+
+-- distinct ereased since uk + not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1;
+
+-- distinct erased due to group by
+explain (costs off) select distinct e from select_distinct_a group by e;
+
+-- distinct erased due to the restirctinfo
+explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c';
+
+-- test join
+set enable_mergejoin to off;
+set enable_hashjoin to off;
+
+insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3);
+insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5);
+
+-- Cartesian join
+explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null;
+select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4;
+
+
+-- left join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a);
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;;
+
+-- right join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a);
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+
+-- full join
+explain (costs off)  select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a);
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+
+-- distinct can't be erased since b.pk2 is missed
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a);
+
+
+-- Semi/anti join
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b);
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b);
+
+-- we also can handle some limited subquery
+explain (costs off) select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a;
+select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3;
+
+explain (costs off) select distinct * from select_distinct_a a,  (select distinct a from select_distinct_b) b where a.pk1 = b.a;
+select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3;
+
+-- Distinct On
+-- can't erase since pk2 is missed
+explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a;
+-- ok to erase
+explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a;
+
+
+-- test some view.
+create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+explain (costs off) select * from distinct_v1;
+
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) select * from distinct_v1;
+
+alter table select_distinct_a alter column uk1 set not null;
+
+-- test generic plan
+prepare pt as select * from distinct_v1;
+explain (costs off)  execute pt;
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) execute pt;
+
+drop view distinct_v1;
+drop table select_distinct_a;
+drop table select_distinct_b;
-- 
2.21.0

