[HACKERS] Get more from indices.

Kyotaro HORIGUCHI Thu, 31 Oct 2013 03:44:50 -0700

Hello, This is the last episode of the 'dance with indices'?
series.


Unique indexes can sort the tuples in corresponding tables
prefectly. So this query might can use index.

> uniquetest=# create table t (a int, b int, c int, d text);
> uniquetest=# create unique index i_t_pkey on t(a, b);
> uniquetest=# insert into t
>   (select a % 10, a / 10, a, 't' from generate_series(0, 100000) a);
> uniquetest=# analyze;
> 
> uniquetest=# explain (costs off, analyze on) select distinct * from t;
>                                 QUERY PLAN                                 
> ---------------------------------------------------------------------------
> Unique (actual time=149.625..245.978 rows=100001 loops=1)
>  ->  Sort (actual time=149.624..199.806 rows=100001 loops=1)
>      Sort Key: a, b, c, d
>      Sort Method: external merge  Disk: 2328kB
>   ->  Seq Scan on t (actual time=0.016..15.597 rows=100001 loops=1)
> Total runtime: 251.272 ms

With this patch, the plan for this query becomes as follows,

> uniquetest=# explain (costs off, analyze on) select distinct * from t;
>                               QUERY PLAN
> ---------------------------------------------------------------------
>  Index Scan using i_t_pkey on t
>                   (actual time=0.019..32.457 rows=100001 loops=1)
>  Total runtime: 37.488 ms

This only seems not so valuable to archive, but with my previous
MergeAppend for UNION patch, this will have more value.

> uniquetest=# create table pu1 (a int, b int, c int, d text);
> uniquetest=# create unique index pu1_pkey_idx on pu1 (a, b);
> uniquetest=# create table cu11 (like pu1 including all) inherits (pu1);
> uniquetest=# create table cu12 (like pu1 including all) inherits (pu1);
> uniquetest=# create table pu2 (like pu1 including all);
> uniquetest=# create table cu21 (like pu1 including all) inherits (pu2);
> uniquetest=# create table cu22 (like pu1 including all) inherits (pu2);
> uniquetest=# insert into cu11 (select a / 5, 4 - (a % 5), a, 'cu11'
>                               from generate_series(000000, 099999) a);
> uniquetest=# insert into cu12 (select a / 5, 4 - (a % 5), a, 'cu12'
>                                from generate_series(100000, 199999) a);
> uniquetest=# insert into cu21 (select a / 5, 4 - (a % 5), a, 'cu21'
>                               from generate_series(150000, 249999) a);
> uniquetest=# insert into cu22 (select a / 5, 4 - (a % 5), a, 'cu22'
>                                from generate_series(250000, 349999) a);
> uniquetest=# explain (analyze on, costs off)
>        select * from pu1 union select * from pu2 order by a, b limit 100;
>                                  QUERY PLAN
> --------------------------------------------------------------------------
>  Limit (actual time=0.226..0.591 rows=100 loops=1)
>   ->  Unique (actual time=0.223..0.561 rows=100 loops=1)
>    ->  Merge Append (actual time=0.223..0.470 rows=100 loops=1)
>        Sort Key: pu1.a, pu1.b, pu1.c, pu1.d
>     ->  Limit (actual time=0.125..0.326 rows=100 loops=1)
>      ->  Unique (actual time=0.125..0.303 rows=100 loops=1)
>       ->  Merge Append (actual time=0.123..0.220 rows=100 loops=1)
>           Sort Key: pu1.a, pu1.b, pu1.c, pu1.d
>        ->  Index Scan using..on pu1 (actual time=0.005..0.005 rows=0 loops=1)
>        ->  Index Scan using..on cu11(actual time=0.071..0.128 rows=100 
> loops=1)
>        ->  Index Scan using..on cu12 (actual time=0.045..0.045 rows=1 loops=1)
>     ->  Limit (actual time=0.096..0.096 rows=1 loops=1)
>      ->  Unique (actual time=0.096..0.096 rows=1 loops=1)
>       ->  Merge Append (actual time=0.094..0.094 rows=1 loops=1)
>           Sort Key: pu2.a, pu2.b, pu2.c, pu2.d
>        ->  Index Scan using..on pu2 (actual time=0.002..0.002 rows=0 loops=1)
>        ->  Index Scan using..on cu21 (actual time=0.047..0.047 rows=1 loops=1)
>        ->  Index Scan using..on cu22 (actual time=0.043..0.043 rows=1 loops=1)
>  Total runtime: 1.987 ms

On the other hand, what we will get from the unpatched PostgreSQL is,

> uniquetest=# explain (analyze on, costs off)
>      select * from pu1 union select * from pu2 order by a, b limit 100;
>                                QUERY PLAN
> -------------------------------------------------------------------------
> Limit (actual time=535.620..535.706 rows=100 loops=1)
>  ->  Unique (actual time=535.618..535.695 rows=100 loops=1)
>   ->  Sort (actual time=535.617..535.637 rows=100 loops=1)
>       Sort Key: pu1.a, pu1.b, pu1.c, pu1.d
>       Sort Method: external sort  Disk: 10568kB
>    ->  Append (actual time=0.012..144.144 rows=400000 loops=1)
>     ->  Append (actual time=0.012..49.570 rows=200000 loops=1)
>      ->  Seq Scan on pu1 (actual time=0.001..0.001 rows=0 loops=1)
>      ->  Seq Scan on cu11 (actual time=0.011..16.202 rows=100000 loops=1)
>      ->  Seq Scan on cu12 (actual time=0.008..16.334 rows=100000 loops=1)
>     ->  Append (actual time=0.010..54.052 rows=200000 loops=1)
>      ->  Seq Scan on pu2 (actual time=0.000..0.000 rows=0 loops=1)
>      ->  Seq Scan on cu21 (actual time=0.009..16.444 rows=100000 loops=1)
>      ->  Seq Scan on cu22 (actual time=0.021..18.923 rows=100000 loops=1)
> Total runtime: 537.765 ms

What do you think about this?


This could be realized by following modifications,

 - Consider a query pathekeys is useful for ordering when a index
   pathkey is a subset of that when the index is UNIQUE.
   (build_index_paths, pathkeys_useful_for_ordering,
   truncate_useless_pathkeys, grouping_planner)

 - Judge a path is orderd or not counting the uniqueness of the
   path.

    - Regard IndexPath on UNIQUE index is reagarded uniquely
      ordered.

    - Regard MergeAppendPath of which all children is uniquely
      orderd as (not necessarily uniquely) ordered.
      (path_is_ordered)

 - Judge the necessity of sorting on above
   premises. (grouping_planner)

Needless to say, theses patches have no effect on any other set
operations than UNION(ALL). They are, INTERSECT(ALL),
EXCEPT(ALL).

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 606734a..bc1ab9a 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 		index_pathkeys = build_index_pathkeys(root, index,
 											  ForwardScanDirection);
 		useful_pathkeys = truncate_useless_pathkeys(root, rel,
-													index_pathkeys);
+													index_pathkeys,
+													index->unique);
 		orderbyclauses = NIL;
 		orderbyclausecols = NIL;
 	}
@@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 		index_pathkeys = build_index_pathkeys(root, index,
 											  BackwardScanDirection);
 		useful_pathkeys = truncate_useless_pathkeys(root, rel,
-													index_pathkeys);
+													index_pathkeys,
+													index->unique);
 		if (useful_pathkeys != NIL)
 		{
 			ipath = create_index_path(root, index,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 6724996..87070e7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -363,6 +363,68 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 }
 
 /*
+ * path_is_uniquely_ordered
+ * Return true if the path is apparently uniquely ordered on the pathkeys.
+ */
+bool
+path_is_uniquely_ordered(Path *path, List *pathkeys)
+{
+	if (pathkeys && path->pathkeys &&
+		IsA(path, IndexPath) &&
+		((IndexPath*)path)->indexinfo->unique &&
+		(pathkeys_contained_in(path->pathkeys, pathkeys)))
+	{
+		return true;
+	}
+	
+	return false;
+}
+
+
+/*
+ * path_is_ordered
+ * Return true if the path is apparently ordered on the pathkeys.  The given
+ * path might be modified so that it will be recognized later as sufficiently
+ * ordered.
+ */
+bool
+path_is_ordered(Path *path, List *pathkeys)
+{
+	if (pathkeys_contained_in(pathkeys, path->pathkeys))
+		return true;
+
+	/* path is obviously ordered when uniquely ordered */
+	if (path_is_uniquely_ordered(path, pathkeys))
+		return true;
+
+	/*
+	 * MergeAppendPath's pathkeys can be replaced by arbitrary pathkeys when
+	 * all subpaths are uniquely ordered on the target pathkeys.
+	 */
+	if (IsA(path, MergeAppendPath))
+	{
+		ListCell *lc;
+		MergeAppendPath *mpath = (MergeAppendPath *)path;
+		
+		foreach (lc, mpath->subpaths)
+		{
+			Path *subpath = (Path *) lfirst(lc);
+			if (!path_is_uniquely_ordered(subpath, pathkeys)) break;
+		}
+		if (!lc)
+		{
+			/*
+			 * This MergeAppendPath is sufficiently ordered on the target
+			 * pathkeys. Reflect that on this path.
+			 */
+			mpath->path.pathkeys = pathkeys;
+			return true;
+		}
+	}
+	return false;
+}
+
+/*
  * get_cheapest_fractional_path_for_pathkeys
  *	  Find the cheapest path (for retrieving a specified fraction of all
  *	  the tuples) that satisfies the given pathkeys and parameterization.
@@ -397,7 +459,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
 			compare_fractional_path_costs(matched_path, path, fraction) <= 0)
 			continue;
 
-		if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+		if (path_is_ordered(path, pathkeys) &&
 			bms_is_subset(PATH_REQ_OUTER(path), required_outer))
 			matched_path = path;
 	}
@@ -733,7 +795,7 @@ build_join_pathkeys(PlannerInfo *root,
 	 * contain pathkeys that were useful for forming this joinrel but are
 	 * uninteresting to higher levels.
 	 */
-	return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
+	return truncate_useless_pathkeys(root, joinrel, outer_pathkeys, false);
 }
 
 /****************************************************************************
@@ -1378,9 +1440,12 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
  * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
  * no good to order by just the first key(s) of the requested ordering.
  * So the result is always either 0 or list_length(root->query_pathkeys).
+ * pk_uniq can be true when pathkeys are unique key itself. Tuples are
+ * sufficiently ordered when the pathkeys are the subset of the
+ * query_pathkeys for the case.
  */
 static int
-pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys, bool pk_uniq)
 {
 	if (root->query_pathkeys == NIL)
 		return 0;				/* no special ordering requested */
@@ -1394,23 +1459,34 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 		return list_length(root->query_pathkeys);
 	}
 
+	/*
+	 * Given that the pathkeys compose an unique key, they are useful for
+	 * ordering when they are a sublist of query_pathkeys.
+	 */
+	if (pk_uniq && pathkeys_contained_in(pathkeys, root->query_pathkeys))
+		return list_length(pathkeys);
+
 	return 0;					/* path ordering not useful */
 }
 
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
+ *
+ * When pathkeys_unique is true, reconsider counting the uniqueness even if
+ * the pathkeys are once judged as useless by the general criteria.
  */
 List *
 truncate_useless_pathkeys(PlannerInfo *root,
 						  RelOptInfo *rel,
-						  List *pathkeys)
+						  List *pathkeys,
+						  bool pathkeys_unique)
 {
 	int			nuseful;
 	int			nuseful2;
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
-	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys, pathkeys_unique);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 9b9eb2f..7b1490f 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -809,7 +809,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
 					  numsortkeys * sizeof(bool)) == 0);
 
 		/* Now, insert a Sort node if subplan isn't sufficiently ordered */
-		if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+		if (!path_is_ordered(subpath, pathkeys))
 			subplan = (Plan *) make_sort(root, subplan, numsortkeys,
 										 sortColIdx, sortOperators,
 										 collations, nullsFirst,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 86abdf6..1ce04ac 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -258,6 +258,31 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 	return result;
 }
 
+/*
+ * Check if a path is sufficiently ordered on target pathkeys. This function
+ * is intended to be used as a replace for pathkeys_contained_in() when the
+ * focused path might be uniquely ordered on the current_pathkeys.
+ */
+static bool
+sort_needed(List *current_pathkeys, bool path_is_unique, List *target_pathkeys)
+{
+	/*
+	 * If target pathkeys are a sublist of current pathkeys, the path is
+	 * ordered also on target pathkeys
+	 */
+	if (pathkeys_contained_in(target_pathkeys, current_pathkeys))
+		return false;
+	
+    /*
+	 * If a plan is uniquely ordered on current_pathkeys, it is sufficiently
+	 * ordered on any pathkeys containing it.
+	 */
+	if (path_is_unique && current_pathkeys &&
+		pathkeys_contained_in(current_pathkeys, target_pathkeys))
+		return false;
+
+	return true;
+}
 
 /*--------------------
  * subquery_planner
@@ -1043,6 +1068,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	double		dNumGroups = 0;
 	bool		use_hashed_distinct = false;
 	bool		tested_hashed_distinct = false;
+	bool		result_plan_is_unique = false;
 
 	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
 	if (parse->limitCount || parse->limitOffset)
@@ -1451,18 +1477,24 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 			result_plan = create_plan(root, best_path);
 			current_pathkeys = best_path->pathkeys;
+			result_plan_is_unique =
+				path_is_uniquely_ordered(best_path, root->query_pathkeys);
 
 			/* Detect if we'll need an explicit sort for grouping */
-			if (parse->groupClause && !use_hashed_grouping &&
-			  !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
+			if (parse->groupClause && !use_hashed_grouping)
 			{
-				need_sort_for_grouping = true;
+				if (path_is_ordered(best_path, root->group_pathkeys))
+					current_pathkeys = root->group_pathkeys;
+				else
+				{
+					need_sort_for_grouping = true;
 
-				/*
-				 * Always override create_plan's tlist, so that we don't sort
-				 * useless data from a "physical" tlist.
-				 */
-				need_tlist_eval = true;
+					/*
+					 * Always override create_plan's tlist, so that we don't
+					 * sort useless data from a "physical" tlist.
+					 */
+					need_tlist_eval = true;
+				}
 			}
 
 			/*
@@ -1575,6 +1607,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 									extract_grouping_ops(parse->groupClause),
 												numGroups,
 												result_plan);
+
+				/* Aggregation might break the tuple uniqueness. */
+				result_plan_is_unique = false;
 			}
 			else if (parse->groupClause)
 			{
@@ -1603,7 +1638,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 									extract_grouping_ops(parse->groupClause),
 												  dNumGroups,
 												  result_plan);
-				/* The Group node won't change sort ordering */
+				/*
+				 * The Group node won't change sort ordering, however might
+				 * break the uniqueness.
+				 */
+				result_plan_is_unique = false;
 			}
 			else if (root->hasHavingQual)
 			{
@@ -1713,8 +1752,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 														result_plan,
 														window_pathkeys,
 														-1.0);
-					if (!pathkeys_contained_in(window_pathkeys,
-											   current_pathkeys))
+
+					if (sort_needed(current_pathkeys, result_plan_is_unique,
+									window_pathkeys))
 					{
 						/* we do indeed need to sort */
 						result_plan = (Plan *) sort_plan;
@@ -1770,6 +1810,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 								   wc->startOffset,
 								   wc->endOffset,
 								   result_plan);
+				/* Aggregation might break the tuple uniqueness. */
+				result_plan_is_unique = false;
 			}
 		}
 	}							/* end of if (setOperations) */
@@ -1855,8 +1897,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				needed_pathkeys = root->sort_pathkeys;
 			else
 				needed_pathkeys = root->distinct_pathkeys;
-
-			if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
+			
+			if (sort_needed(current_pathkeys, result_plan_is_unique,
+							needed_pathkeys))
 			{
 				if (list_length(root->distinct_pathkeys) >=
 					list_length(root->sort_pathkeys))
@@ -1875,8 +1918,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 															   -1.0);
 			}
 
-			result_plan = (Plan *) make_unique(result_plan,
-											   parse->distinctClause);
+			if (!result_plan_is_unique)
+				result_plan = (Plan *) make_unique(result_plan,
+												   parse->distinctClause);
 			result_plan->plan_rows = dNumDistinctRows;
 			/* The Unique node won't change sort ordering */
 		}
@@ -1888,7 +1932,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	 */
 	if (parse->sortClause)
 	{
-		if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+		if (sort_needed(current_pathkeys, result_plan_is_unique,
+						root->sort_pathkeys))
 		{
 			result_plan = (Plan *) make_sort_from_pathkeys(root,
 														   result_plan,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9ef93c7..8e6d0e7 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -156,6 +156,8 @@ typedef enum
 
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern bool path_is_ordered(Path *path, List *pathkeys);
+extern bool path_is_uniquely_ordered(Path *path, List *pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion);
@@ -190,7 +192,8 @@ extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,
 							  List *outer_pathkeys);
 extern List *truncate_useless_pathkeys(PlannerInfo *root,
 						  RelOptInfo *rel,
-						  List *pathkeys);
+						  List *pathkeys,
+						  bool pathkeys_unique);
 extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
 
 #endif   /* PATHS_H */

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Get more from indices.

Reply via email to