From 7799b005726b542cffa6e9a19704cabf235b214e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= <yizhi.fzh@alibaba-inc.com>
Date: Wed, 7 Jul 2021 16:02:01 +0800
Subject: [PATCH v1] design uniquekey v2

---
 src/backend/nodes/list.c                      |  16 +
 src/backend/optimizer/path/Makefile           |   3 +-
 src/backend/optimizer/path/allpaths.c         |   7 +-
 src/backend/optimizer/path/uniquekey.c        | 276 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          |   3 +
 src/backend/optimizer/util/plancat.c          |  17 ++
 src/include/nodes/nodes.h                     |   3 +-
 src/include/nodes/pathnodes.h                 |  16 +
 src/include/nodes/pg_list.h                   |   2 +
 src/include/optimizer/paths.h                 |   4 +
 src/include/optimizer/plancat.h               |   2 +
 src/test/regress/expected/join.out            |  11 +-
 src/test/regress/expected/select_distinct.out |  50 ++++
 src/test/regress/sql/select_distinct.sql      |  20 ++
 14 files changed, 420 insertions(+), 10 deletions(-)
 create mode 100644 src/backend/optimizer/path/uniquekey.c

diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index 94fb236daf..3053ce38ea 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -702,6 +702,22 @@ list_member_oid(const List *list, Oid datum)
 	return false;
 }
 
+/*
+ * list_is_subset_ptr - is A a subset of B?
+ */
+bool
+list_is_subset_ptr(const List *a, const List *b)
+{
+	ListCell *lc;
+	foreach(lc, a)
+	{
+		if (!list_member_ptr(b, lfirst(lc)))
+			return false;
+	}
+	return true;
+}
+
+
 /*
  * Delete the n'th cell (counting from 0) in list.
  *
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 1e199ff66f..63cc1505d9 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	joinpath.o \
 	joinrels.o \
 	pathkeys.o \
-	tidpath.o
+	tidpath.o \
+	uniquekey.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 0dcb9e2337..f5d8a443f9 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -396,6 +396,9 @@ static void
 set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 			 Index rti, RangeTblEntry *rte)
 {
+	/* Set the notnull before the UniqueKey populate */
+	set_baserel_notnull_attrs(rel);
+
 	if (rel->reloptkind == RELOPT_BASEREL &&
 		relation_excluded_by_constraints(root, rel, rte))
 	{
@@ -491,7 +494,7 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 		}
 	}
 
-	set_baserel_notnull_attrs(rel);
+
 
 	/*
 	 * We insist that all non-dummy rels have a nonzero rowcount estimate.
@@ -616,6 +619,8 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 */
 	check_index_predicates(root, rel);
 
+	populate_baserel_uniquekeys(root, rel);
+
 	/* Mark rel with estimated output rows, width, etc */
 	set_baserel_size_estimates(root, rel);
 }
diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c
new file mode 100644
index 0000000000..6111e9fa00
--- /dev/null
+++ b/src/backend/optimizer/path/uniquekey.c
@@ -0,0 +1,276 @@
+/*-------------------------------------------------------------------------
+ *
+ * pathkeys.c
+ *	  Utilities for maintaining uniquekey.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/path/uniquekey.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pathnodes.h"
+#include "optimizer/paths.h"
+
+
+/*
+ * print_uniquekey
+ *	Used for easier reivew, should be removed before commit.
+ */
+static void
+print_uniquekey(PlannerInfo *root, RelOptInfo *rel)
+{
+	if (false)
+	{
+		ListCell	*lc;
+		elog(INFO, "Rel = %s", bmsToString(rel->relids));
+		foreach(lc, rel->uniquekeys)
+		{
+			UniqueKey *ukey = lfirst_node(UniqueKey, lc);
+			int i = -1;
+			elog(INFO, "UNIQUEKEY{indexes=%s, multinull=%d}",
+				 bmsToString(ukey->unique_expr_indexes),
+				 ukey->multi_nulls
+				);
+
+			while ((i = bms_next_member(ukey->unique_expr_indexes, i)) >= 0)
+			{
+				Node *node = (Node *) list_nth(root->unique_exprs, i);
+				if (IsA(node, SingleRow))
+					elog(INFO,
+						 "Expr(%d) SingleRow{relid = %d}",
+						 i, castNode(SingleRow, node)->relid);
+				else
+					elog(INFO,
+						 "EC(%d), %s", i, nodeToString(node)
+						);
+			}
+		}
+	}
+}
+
+static UniqueKey *
+make_uniquekey(Bitmapset *unique_expr_indexes, bool multi_null)
+{
+	UniqueKey *ukey = makeNode(UniqueKey);
+	ukey->unique_expr_indexes = unique_expr_indexes;
+	ukey->multi_nulls = multi_null;
+	return ukey;
+}
+
+
+static PathKey *
+find_matching_pathkey_expr(Expr *expr,  List *pathkeys, Relids relids)
+{
+	ListCell *lc;
+	foreach(lc, pathkeys)
+	{
+		PathKey * pathkey = lfirst_node(PathKey, lc);
+		if (find_ec_member_matching_expr(pathkey->pk_eclass, expr, relids))
+			return pathkey;
+	}
+	return NULL;
+}
+
+static bool
+add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
+							  List *mergeable_const_peer, List *expr_opfamilies)
+{
+
+	List	*unique_ecs = NIL;
+	ListCell	*indexpr_item;
+	int	c = 0;
+	RelOptInfo *rel = unique_index->rel;
+	PathKey	*pathkey;
+	bool	multinull = false;
+
+	indexpr_item = list_head(unique_index->indexprs);
+	for (c = 0; c < unique_index->nkeycolumns; c++)
+	{
+		int attr = unique_index->indexkeys[c];
+		Expr *expr;
+		bool	matched_const = false;
+		ListCell	*lc1, *lc2;
+		if (attr > 0)
+		{
+			Var *var;
+			expr = list_nth_node(TargetEntry, unique_index->indextlist, c)->expr;
+			var = castNode(Var, expr);
+			Assert(IsA(expr, Var));
+			if (!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
+							  rel->notnull_attrs[0]))
+				multinull = true;
+		}
+		else if (attr == 0)
+		{
+			/* Expression index */
+			expr = lfirst(indexpr_item);
+			indexpr_item = lnext(unique_index->indexprs, indexpr_item);
+			/* We can't grantee an FuncExpr will not return NULLs */
+			multinull = true;
+		}
+		else /* attr < 0 */
+		{
+			/* Index on OID is possible, not handle it for now. */
+			return false;
+		}
+
+		/*
+		 * Check index_col = Const case with regarding to opfamily checking
+		 * If so, we can remove the index_col from the final UniqueKey->exprs.
+		 */
+		forboth(lc1, mergeable_const_peer, lc2, expr_opfamilies)
+		{
+			if (list_member_oid((List *) lfirst(lc2), unique_index->opfamily[c]) &&
+				match_index_to_operand((Node *) lfirst(lc1), c, unique_index))
+			{
+				matched_const = true;
+				break;
+			}
+		}
+
+		if (matched_const)
+			continue;
+
+		/* Check if this expr exist in distinct_pathkey. */
+		pathkey = find_matching_pathkey_expr(expr, root->distinct_pathkeys, rel->relids);
+		if (!pathkey)
+			return false;
+		unique_ecs = lappend(unique_ecs, pathkey->pk_eclass);
+	}
+
+	{
+		Bitmapset *unique_exprs_index = bms_make_singleton(list_length(root->unique_exprs));
+		if (unique_ecs == NIL)
+		{
+			SingleRow *singlerow = makeNode(SingleRow);
+			singlerow->relid = rel->relid;
+			rel->uniquekeys = list_make1(make_uniquekey(unique_exprs_index, false /* multi-null */));
+			root->unique_exprs = lappend(root->unique_exprs, singlerow);
+			return true;
+		}
+		else
+		{
+			UniqueKey *ukey = make_uniquekey(unique_exprs_index, multinull);
+			root->unique_exprs = lappend(root->unique_exprs, unique_ecs);
+			rel->uniquekeys = lappend(rel->uniquekeys, ukey);
+			return false;
+		}
+	}
+	return false;
+}
+
+void
+populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *rel)
+{
+	ListCell	*lc;
+	List	*mergeable_const_peer = NIL, *expr_opfamilies = NIL;
+	foreach(lc, rel->baserestrictinfo)
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+		if (rinfo->mergeopfamilies == NIL)
+			continue;
+
+		if (bms_is_empty(rinfo->left_relids))
+			mergeable_const_peer = lappend(mergeable_const_peer, get_rightop(rinfo->clause));
+		else if (bms_is_empty(rinfo->right_relids))
+			mergeable_const_peer = lappend(mergeable_const_peer, get_leftop(rinfo->clause));
+		else
+			continue;
+		expr_opfamilies = lappend(expr_opfamilies, rinfo->mergeopfamilies);
+	}
+
+	foreach(lc, rel->indexlist)
+	{
+		IndexOptInfo *index = (IndexOptInfo *)lfirst(lc);
+		if (!index->unique || !index->immediate ||
+			(index->indpred != NIL && !index->predOK))
+			continue;
+
+		if (add_uniquekey_for_uniqueindex(root, index,
+										  mergeable_const_peer,
+										  expr_opfamilies))
+			return;
+	}
+
+	print_uniquekey(root, rel);
+}
+
+
+static bool
+uniquekey_contains_in(PlannerInfo *root, UniqueKey *ukey, List *lecs, Relids relids)
+{
+	int i = -1;
+	while ((i = bms_next_member(ukey->unique_expr_indexes, i)) >= 0)
+	{
+		Node *expr = list_nth(root->unique_exprs, i);
+		if (IsA(expr, SingleRow))
+		{
+			/* Any column regarding SingleRow.relid is OK */
+			if (!bms_is_member(castNode(SingleRow, expr)->relid, relids))
+				return false;
+		}
+		else if (IsA(expr, List))
+		{
+			/* unique_expr is a List of EquivalenceClass * */
+			if (!list_is_subset_ptr((List*)expr, lecs))
+				return false;
+		}
+		else
+		{
+			/* Impossible to go here */
+			Assert(false);
+			return false;
+		}
+	}
+	return true;
+}
+
+
+bool
+relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *distinct_pathkey)
+{
+	ListCell	*lc;
+	List	*lecs = NIL;
+	Relids	relids = NULL;
+	foreach(lc, distinct_pathkey)
+	{
+		PathKey *pathkey = lfirst(lc);
+		lecs = lappend(lecs, pathkey->pk_eclass);
+		/*
+		 * Note that ec_relids doesn't include child member, but
+		 * distinct would not operate on childrel as well.
+		 */
+		relids = bms_union(relids, pathkey->pk_eclass->ec_relids);
+	}
+
+	foreach(lc, rel->uniquekeys)
+	{
+		UniqueKey *ukey = lfirst(lc);
+		if (ukey->multi_nulls)
+			continue;
+
+		if (uniquekey_contains_in(root, ukey, lecs, relids))
+			return true;
+	}
+
+	return false;
+
+}
+
+
+static UniqueKey*
+__attribute__ ((unused))
+build_composited_uniquekey(UniqueKey *ukey1, UniqueKey *ukey2, bool multi_null)
+{
+	Bitmapset *unique_expr = bms_union(ukey1->unique_expr_indexes, ukey2->unique_expr_indexes);
+	return make_uniquekey(unique_expr, multi_null);
+
+}
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1868c4eff4..b9d4e6395e 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4238,6 +4238,9 @@ create_distinct_paths(PlannerInfo *root,
 	Path	   *path;
 	ListCell   *lc;
 
+	if (relation_is_distinct_for(root, input_rel, root->distinct_pathkeys))
+		return input_rel;
+
 	/* For now, do all work in the (DISTINCT, NULL) upperrel */
 	distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
 
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 7d3b40090e..4852d88c52 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2079,6 +2079,23 @@ get_function_rows(PlannerInfo *root, Oid funcid, Node *node)
 	return result;
 }
 
+List *
+get_unique_indexes(RelOptInfo *rel)
+{
+	List	*res = NIL;
+	ListCell	*lc;
+	foreach(lc, rel->indexlist)
+	{
+		IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+
+		if (index->unique && index->immediate &&
+			(index->indpred == NIL || index->predOK))
+			res = lappend(res, index);
+	}
+	return res;
+}
+
+
 /*
  * has_unique_index
  *
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index d9e417bcd7..7a76413ba1 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -280,7 +280,8 @@ typedef enum NodeTag
 	T_RollupData,
 	T_GroupingSetData,
 	T_StatisticExtInfo,
-
+	T_UniqueKey,
+	T_SingleRow,
 	/*
 	 * TAGS FOR MEMORY NODES (memnodes.h)
 	 */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index d6758f21e1..2ee9e0885b 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -246,6 +246,7 @@ struct PlannerInfo
 									 * subquery outputs */
 
 	List	   *eq_classes;		/* list of active EquivalenceClasses */
+	List	   *unique_exprs;		/* List of unique expr */
 
 	bool		ec_merging_done;	/* set true once ECs are canonical */
 
@@ -691,6 +692,7 @@ typedef struct RelOptInfo
 								  * the len would always 1. and for others the array
 								  * index is relid from relids.
 								  */
+	List		*uniquekeys; /* A list of UniqueKey. */
 
 	/* materialization information */
 	List	   *pathlist;		/* Path structures */
@@ -1067,6 +1069,20 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+
+typedef struct UnqiueKey
+{
+	NodeTag	type;
+	Bitmapset	*unique_expr_indexes;
+	bool	multi_nulls;
+} UniqueKey;
+
+typedef struct SingleRow
+{
+	NodeTag	type;
+	Index		relid;
+} SingleRow;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 30f98c4595..bbe0209d7e 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -558,6 +558,8 @@ extern bool list_member_ptr(const List *list, const void *datum);
 extern bool list_member_int(const List *list, int datum);
 extern bool list_member_oid(const List *list, Oid datum);
 
+extern bool list_is_subset_ptr(const List *a, const List *b);
+
 extern pg_nodiscard List *list_delete(List *list, void *datum);
 extern pg_nodiscard List *list_delete_ptr(List *list, void *datum);
 extern pg_nodiscard List *list_delete_int(List *list, int datum);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..07a51d737a 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -255,4 +255,8 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 									List *live_childrels);
 
+extern void populate_baserel_uniquekeys(PlannerInfo *root,
+										RelOptInfo *baserel);
+extern bool relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
+									 List *distinct_pathkey);
 #endif							/* PATHS_H */
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index 8d1d6c1b42..2dd4f525e6 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -41,6 +41,8 @@ extern bool relation_excluded_by_constraints(PlannerInfo *root,
 
 extern List *build_physical_tlist(PlannerInfo *root, RelOptInfo *rel);
 
+extern List *get_unique_indexes(RelOptInfo *rel);
+extern List *get_exprs_from_index(IndexOptInfo *indinfo, bool contains_includekey);
 extern bool has_unique_index(RelOptInfo *rel, AttrNumber attno);
 
 extern Selectivity restriction_selectivity(PlannerInfo *root,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index fec0325e73..0631e8076d 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4604,18 +4604,15 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
+             QUERY PLAN             
+------------------------------------
  Merge Right Join
    Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+   ->  Index Scan using b_pkey on b
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
-(9 rows)
+(6 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 11c6f50fbf..c5d32cec3a 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -306,3 +306,53 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
  t
 (1 row)
 
+-- uniquekey test
+CREATE TABLE uktest(a int, b int, c int not null, d int, e int, f int, PRIMARY KEY(a, b));
+CREATE UNIQUE INDEX on uktest(c, d);
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uktest;
+     QUERY PLAN     
+--------------------
+ Seq Scan on uktest
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest;
+        QUERY PLAN        
+--------------------------
+ HashAggregate
+   Group Key: c
+   ->  Seq Scan on uktest
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest;
+        QUERY PLAN        
+--------------------------
+ HashAggregate
+   Group Key: c, d
+   ->  Seq Scan on uktest
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest WHERE d > 10;
+     QUERY PLAN     
+--------------------
+ Seq Scan on uktest
+   Filter: (d > 10)
+(2 rows)
+
+-- UniqueKey expressions reduce due to c = Const.
+EXPLAIN (COSTS OFF) SELECT DISTINCT b FROM uktest where a = 1;
+               QUERY PLAN               
+----------------------------------------
+ Bitmap Heap Scan on uktest
+   Recheck Cond: (a = 1)
+   ->  Bitmap Index Scan on uktest_pkey
+         Index Cond: (a = 1)
+(4 rows)
+
+-- Single row case.
+EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest where a = 1 and b = 1;
+               QUERY PLAN               
+----------------------------------------
+ Index Scan using uktest_pkey on uktest
+   Index Cond: ((a = 1) AND (b = 1))
+(2 rows)
+
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 33102744eb..706bcf37df 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -135,3 +135,23 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 SELECT 2 IS NOT DISTINCT FROM null as "no";
 SELECT null IS NOT DISTINCT FROM null as "yes";
+
+-- uniquekey test
+CREATE TABLE uktest(a int, b int, c int not null, d int, e int, f int, PRIMARY KEY(a, b));
+CREATE UNIQUE INDEX on uktest(c, d);
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uktest;
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest;
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest;
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest WHERE d > 10;
+
+-- UniqueKey expressions reduce due to c = Const.
+EXPLAIN (COSTS OFF) SELECT DISTINCT b FROM uktest where a = 1;
+
+-- Single row case.
+EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest where a = 1 and b = 1;
+
+
-- 
2.21.0

