From 5f7c5b4a73175e9a7cb7115083e88e8ed3c540a6 Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Fri, 26 Oct 2018 09:18:09 +1300
Subject: [PATCH v1] Allow Append to be used in place of MergeAppend for some
 cases

For RANGE partitioned tables with no sub-partitioned tables and no default
partition the subpaths of a MergeAppend are always arranged in range
order. This means that MergeAppend, when sorting by the partition key or a
superset of the partition key, will always output tuples from earlier
subpaths before later subpaths.  This guarantee means we can just use a
non-parallel Append node instead since this is exactly what Append does.
This gives a nice performance improvement, even for CPU native types which
can be sorted without much effort.
---
 contrib/postgres_fdw/expected/postgres_fdw.out |   6 +-
 src/backend/nodes/list.c                       |  38 ++++++
 src/backend/optimizer/path/allpaths.c          | 158 ++++++++++++++++++++++---
 src/backend/optimizer/path/joinrels.c          |   2 +-
 src/backend/optimizer/path/pathkeys.c          |  62 ++++++++++
 src/backend/optimizer/plan/planner.c           |   3 +-
 src/backend/optimizer/prep/prepunion.c         |   6 +-
 src/backend/optimizer/util/pathnode.c          |  12 +-
 src/include/nodes/pg_list.h                    |   1 +
 src/include/optimizer/pathnode.h               |   2 +-
 src/include/optimizer/paths.h                  |   2 +
 src/test/regress/expected/inherit.out          |  88 +++++++++++++-
 src/test/regress/expected/partition_prune.out  |  64 +++++-----
 src/test/regress/sql/inherit.sql               |  28 +++++
 src/test/regress/sql/partition_prune.sql       |  10 +-
 15 files changed, 408 insertions(+), 74 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 21a2ef5ad3..4888bb7bea 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8397,12 +8397,12 @@ SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1
 --------------------------------------------------------
  Sort
    Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2)
-   ->  Hash Full Join
-         Hash Cond: (t1.a = t2.b)
+   ->  Merge Full Join
+         Merge Cond: (t1.a = t2.b)
          ->  Append
                ->  Foreign Scan on ftprt1_p1 t1
                ->  Foreign Scan on ftprt1_p2 t1_1
-         ->  Hash
+         ->  Materialize
                ->  Append
                      ->  Foreign Scan on ftprt2_p1 t2
                      ->  Foreign Scan on ftprt2_p2 t2_1
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index 55fd4c359b..139fae8216 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -1314,6 +1314,44 @@ list_qsort(const List *list, list_qsort_comparator cmp)
 	return newlist;
 }
 
+/*
+ * list_reverse
+ *		Create and return a new shallow copy of 'oldlist', but in reverse order.
+ */
+List *
+list_reverse(const List *oldlist)
+{
+	List	   *newlist;
+	ListCell   *oldlist_cur;
+
+	if (oldlist == NIL)
+		return NIL;
+
+	newlist = new_list(oldlist->type);
+	newlist->length = oldlist->length;
+
+	/*
+	 * Copy over the data in the fist cell to the tail of the new list;
+	 * new_list() has already allocated the tail cell itself
+	 */
+	newlist->tail->data = oldlist->head->data;
+
+	for_each_cell(oldlist_cur, oldlist->head->next)
+	{
+		ListCell   *newlist_cur;
+
+		newlist_cur = (ListCell *) palloc(sizeof(*newlist_cur));
+		newlist_cur->data = oldlist_cur->data;
+
+		/* push the new cell onto the head of the list */
+		newlist_cur->next = newlist->head;
+		newlist->head = newlist_cur;
+	}
+
+	check_list_invariants(newlist);
+	return newlist;
+}
+
 /*
  * Temporary compatibility functions
  *
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 5f74d3b36d..b7219d53ae 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -98,7 +98,8 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 						   List *live_childrels,
 						   List *all_child_pathkeys,
-						   List *partitioned_rels);
+						   List *partitioned_rels,
+						   bool try_ordered_append);
 static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
 									  RelOptInfo *rel,
 									  Relids required_outer);
@@ -1381,6 +1382,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 	ListCell   *l;
 	List	   *partitioned_rels = NIL;
 	double		partial_rows = -1;
+	bool		hassubparts = false;
 
 	/* If appropriate, consider parallel append */
 	pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
@@ -1444,6 +1446,10 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		ListCell   *lcp;
 		Path	   *cheapest_partial_path = NULL;
 
+		/* Record if there are any children which are partitioned tables. */
+		if (childrel->part_scheme)
+			hassubparts = true;
+
 		/*
 		 * For UNION ALLs with non-empty partitioned_child_rels, accumulate
 		 * the Lists of child relations.
@@ -1597,7 +1603,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 	 */
 	if (subpaths_valid)
 		add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
-												  NULL, 0, false,
+												  NIL, NULL, 0, false,
 												  partitioned_rels, -1));
 
 	/*
@@ -1639,7 +1645,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 
 		/* Generate a partial append path. */
 		appendpath = create_append_path(root, rel, NIL, partial_subpaths,
-										NULL, parallel_workers,
+										NIL, NULL, parallel_workers,
 										enable_parallel_append,
 										partitioned_rels, -1);
 
@@ -1689,7 +1695,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 
 		appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
 										pa_partial_subpaths,
-										NULL, parallel_workers, true,
+										NIL, NULL, parallel_workers, true,
 										partitioned_rels, partial_rows);
 		add_partial_path(rel, (Path *) appendpath);
 	}
@@ -1699,9 +1705,24 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 	 * list of child pathkeys.
 	 */
 	if (subpaths_valid)
+	{
+		bool	try_ordered_append;
+
+		/*
+		 * We'll attempt to substitute MergeAppends for simple Appends for
+		 * partitioned tables guarantee an earlier partition contains earlier
+		 * tuples.  We only do this for base tables as sub-partitions paths
+		 * are flattened into the base table's append paths.
+		 */
+		try_ordered_append = !hassubparts &&
+							 rel->reloptkind == RELOPT_BASEREL &&
+							 rel->part_scheme != NULL;
+
 		generate_mergeappend_paths(root, rel, live_childrels,
 								   all_child_pathkeys,
-								   partitioned_rels);
+								   partitioned_rels,
+								   try_ordered_append);
+	}
 
 	/*
 	 * Build Append paths for each parameterization seen among the child rels.
@@ -1751,7 +1772,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 		if (subpaths_valid)
 			add_path(rel, (Path *)
 					 create_append_path(root, rel, subpaths, NIL,
-										required_outer, 0, false,
+										NIL, required_outer, 0, false,
 										partitioned_rels, -1));
 	}
 }
@@ -1778,14 +1799,48 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
  * parameterized mergejoin plans, it might be worth adding support for
  * parameterized MergeAppends to feed such joins.  (See notes in
  * optimizer/README for why that might not ever happen, though.)
+ *
+ * 'try_ordered_append' can be passed as true to have the function attempt
+ * to use an Append node in place of a MergeAppend node. Callers must ensure
+ * that 'rel' is a partitioned table which contains no live sub-partitioned
+ * tables.
  */
 static void
 generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 						   List *live_childrels,
 						   List *all_child_pathkeys,
-						   List *partitioned_rels)
+						   List *partitioned_rels,
+						   bool try_ordered_append)
 {
 	ListCell   *lcp;
+	List	   *partition_pathkeys = NIL;
+	List	   *partition_pathkeys_desc = NIL;
+
+	/*
+	 * Some partitioned table setups may allow us to use an Append node
+	 * instead of a MergeAppend.  This is possible in cases such as RANGE
+	 * partitioned tables where it's guaranteed that an earlier partition must
+	 * contain rows which come earlier in the sort order.  Ideally, all of the
+	 * logic to determine when this is possible would be defined in
+	 * build_partition_pathkeys(), but one reason where we must disable this
+	 * is when there are sub-partitioned tables.  We can't enable the
+	 * optimization in this case due to how we flatten MergeAppend subnodes.
+	 * It would be possible to work around this if we disabled the flattening
+	 * for this case, but it currently seems like more trouble than it's
+	 * worth.  The check for sub-partitioned tables is cheaper to implement in
+	 * the calling function since it's likely to have just processed the
+	 * live_children list and could have checked for sub-partitioned tables
+	 * along the way.  We let build_partition_pathkeys() handle the remaining
+	 * checks.
+	 */
+	if (try_ordered_append)
+	{
+		partition_pathkeys = build_partition_pathkeys(root, rel,
+													  ForwardScanDirection);
+		partition_pathkeys_desc = build_partition_pathkeys(root, rel,
+													BackwardScanDirection);
+	}
+
 
 	foreach(lcp, all_child_pathkeys)
 	{
@@ -1842,20 +1897,89 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
 									  &total_subpaths, NULL);
 		}
 
-		/* ... and build the MergeAppend paths */
-		add_path(rel, (Path *) create_merge_append_path(root,
-														rel,
-														startup_subpaths,
-														pathkeys,
-														NULL,
-														partitioned_rels));
-		if (startup_neq_total)
+		/*
+		 * When the partitioned table's pathkeys are a prefix of the required
+		 * pathkeys, then there's no need to perform a MergeAppend. We're
+		 * already scanning the partitions in order so a simple Append will
+		 * suffice.  This has performance benefits during query execution.
+		 */
+		if (pathkeys_contained_in(pathkeys, partition_pathkeys))
+		{
+			add_path(rel, (Path *) create_append_path(root,
+													  rel,
+													  startup_subpaths,
+													  NIL,
+													  pathkeys,
+													  NULL,
+													  0,
+													  false,
+													  partitioned_rels,
+													  -1));
+			if (startup_neq_total)
+				add_path(rel, (Path *) create_append_path(root,
+														  rel,
+														  total_subpaths,
+														  NIL,
+														  pathkeys,
+														  NULL,
+														  0,
+														  false,
+														  partitioned_rels,
+														  -1));
+
+		}
+
+		/*
+		 * Perhaps a pathkeys match if we were to scan the partitions in
+		 * reverse order?
+		 */
+		else if (pathkeys_contained_in(pathkeys, partition_pathkeys_desc))
+		{
+			/*
+			 * XXX worth caching the reverse Lists? Perhaps it's unlikely that
+			 * there's more than 1 matching path.
+			 */
+			add_path(rel, (Path *) create_append_path(root,
+													  rel,
+											list_reverse(startup_subpaths),
+													  NIL,
+													  pathkeys,
+													  NULL,
+													  0,
+													  false,
+													  partitioned_rels,
+													  -1));
+			if (startup_neq_total)
+				add_path(rel, (Path *) create_append_path(root,
+														  rel,
+												list_reverse(total_subpaths),
+														  NIL,
+														  pathkeys,
+														  NULL,
+														  0,
+														  false,
+														  partitioned_rels,
+														  -1));
+
+		}
+
+		else
+		{
+			/* ... and build the MergeAppend paths */
 			add_path(rel, (Path *) create_merge_append_path(root,
 															rel,
-															total_subpaths,
+															startup_subpaths,
 															pathkeys,
 															NULL,
 															partitioned_rels));
+			if (startup_neq_total)
+				add_path(rel, (Path *) create_merge_append_path(root,
+																rel,
+																total_subpaths,
+																pathkeys,
+																NULL,
+																partitioned_rels));
+		}
 	}
 }
 
@@ -2016,7 +2140,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
 	rel->pathlist = NIL;
 	rel->partial_pathlist = NIL;
 
-	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL,
+	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, NULL,
 											  0, false, NIL, -1));
 
 	/*
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index d3d21fed5d..2f9fc50bf2 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1231,7 +1231,7 @@ mark_dummy_rel(RelOptInfo *rel)
 	rel->partial_pathlist = NIL;
 
 	/* Set up the dummy path */
-	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL,
+	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, NULL,
 											  0, false, NIL, -1));
 
 	/* Set or update cheapest_total_path and related fields */
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..cdee4a6ec2 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -25,6 +25,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
+#include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
 
@@ -547,6 +548,67 @@ build_index_pathkeys(PlannerInfo *root,
 	return retval;
 }
 
+/*
+ * build_partition_pathkeys
+ *	  Build a pathkeys list that describes the ordering induced by the
+ *	  partitions of 'partrel'.  (Note that for partitions that don't have a
+ *	  natural ordering, we return NIL.)
+ */
+List *
+build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
+						 ScanDirection scandir)
+{
+	PartitionScheme partscheme;
+	List	   *retval;
+	int			i;
+
+	/*
+	 * Only RANGE type partitions guarantee that the partitions can be scanned
+	 * in the order that they're defined in the PartitionDesc to provide
+	 * non-overlapping ranges of tuples.
+	 */
+	if (partrel->boundinfo->strategy != PARTITION_STRATEGY_RANGE ||
+		partition_bound_has_default(partrel->boundinfo))
+		return NIL;
+
+	retval = NIL;
+	partscheme = partrel->part_scheme;
+
+	for (i = 0; i < partscheme->partnatts; i++)
+	{
+		PathKey    *cpathkey;
+		Expr	   *keyCol = linitial(partrel->partexprs[i]);
+
+		/*
+		 * OK, try to make a canonical pathkey for this part key.  Note we're
+		 * underneath any outer joins, so nullable_relids should be NULL.
+		 *
+		 * Currently pass nulls_first according to the scan direction.  This
+		 * will cause the order not to match when NULLS LAST is specified.
+		 * We're missing an optimization opportunity here since no NULLs can
+		 * exist due to us requiring above that no DEFAULT partition exists,
+		 * which is the only place NULLs could be stored. Likely this is not
+		 * worth worrying about since we'd miss the same opportunity for a
+		 * table with a NOT NULL constraint.
+		 */
+		cpathkey = make_pathkey_from_sortinfo(root,
+											  keyCol,
+											  NULL,
+											  partscheme->partopfamily[i],
+											  partscheme->partopcintype[i],
+											  partscheme->partcollation[i],
+											 ScanDirectionIsBackward(scandir),
+											 ScanDirectionIsBackward(scandir),
+											  0,
+											  partrel->relids,
+											  false);
+
+		retval = lappend(retval, cpathkey);
+	}
+
+	return retval;
+}
+
 /*
  * build_expression_pathkey
  *	  Build a pathkeys list that describes an ordering by a single expression
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index c729a99f8b..78b834032d 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3899,6 +3899,7 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 							   grouped_rel,
 							   paths,
 							   NIL,
+							   NIL,
 							   NULL,
 							   0,
 							   false,
@@ -6878,7 +6879,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 		 * node, which would cause this relation to stop appearing to be a
 		 * dummy rel.)
 		 */
-		rel->pathlist = list_make1(create_append_path(root, rel, NIL, NIL,
+		rel->pathlist = list_make1(create_append_path(root, rel, NIL, NIL, NIL,
 													  NULL, 0, false, NIL,
 													  -1));
 		rel->partial_pathlist = NIL;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index d5720518a8..6d4657a4c1 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -656,7 +656,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	/*
 	 * Append the child results together.
 	 */
-	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+	path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NIL,
 									   NULL, 0, false, NIL, -1);
 
 	/*
@@ -711,7 +711,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		Assert(parallel_workers > 0);
 
 		ppath = (Path *)
-			create_append_path(root, result_rel, NIL, partial_pathlist,
+			create_append_path(root, result_rel, NIL, partial_pathlist, NIL,
 							   NULL, parallel_workers, enable_parallel_append,
 							   NIL, -1);
 		ppath = (Path *)
@@ -822,7 +822,7 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	/*
 	 * Append the child results together.
 	 */
-	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
+	path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NIL,
 									   NULL, 0, false, NIL, -1);
 
 	/* Identify the grouping semantics */
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index d50d86b252..dbbf81f0ac 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1219,7 +1219,7 @@ AppendPath *
 create_append_path(PlannerInfo *root,
 				   RelOptInfo *rel,
 				   List *subpaths, List *partial_subpaths,
-				   Relids required_outer,
+				   List *pathkeys, Relids required_outer,
 				   int parallel_workers, bool parallel_aware,
 				   List *partitioned_rels, double rows)
 {
@@ -1253,7 +1253,7 @@ create_append_path(PlannerInfo *root,
 	pathnode->path.parallel_aware = parallel_aware;
 	pathnode->path.parallel_safe = rel->consider_parallel;
 	pathnode->path.parallel_workers = parallel_workers;
-	pathnode->path.pathkeys = NIL;	/* result is always considered unsorted */
+	pathnode->path.pathkeys = pathkeys;
 	pathnode->partitioned_rels = list_copy(partitioned_rels);
 
 	/*
@@ -1263,10 +1263,14 @@ create_append_path(PlannerInfo *root,
 	 * costs.  There may be some paths that require to do startup work by a
 	 * single worker.  In such case, it's better for workers to choose the
 	 * expensive ones first, whereas the leader should choose the cheapest
-	 * startup plan.
+	 * startup plan.  Note: We mustn't fiddle with the order of subpaths
+	 * when the Append has valid pathkeys.  The order they're listed in
+	 * is critical to keeping the pathkeys valid.
 	 */
 	if (pathnode->path.parallel_aware)
 	{
+		Assert(pathkeys == NIL);
+
 		subpaths = list_qsort(subpaths, append_total_cost_compare);
 		partial_subpaths = list_qsort(partial_subpaths,
 									  append_startup_cost_compare);
@@ -3587,7 +3591,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
 				}
 				return (Path *)
 					create_append_path(root, rel, childpaths, partialpaths,
-									   required_outer,
+									   NIL, required_outer,
 									   apath->path.parallel_workers,
 									   apath->path.parallel_aware,
 									   apath->partitioned_rels,
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index e6cd2cdfba..83e8b62a55 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -271,6 +271,7 @@ extern List *list_copy_tail(const List *list, int nskip);
 
 typedef int (*list_qsort_comparator) (const void *a, const void *b);
 extern List *list_qsort(const List *list, list_qsort_comparator cmp);
+extern List *list_reverse(const List *list);
 
 /*
  * To ease migration to the new list API, a set of compatibility
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 81abcf53a8..5a790cf6be 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -65,7 +65,7 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
 extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
 					List *tidquals, Relids required_outer);
 extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
-				   List *subpaths, List *partial_subpaths,
+				   List *subpaths, List *partial_subpaths, List *pathkeys,
 				   Relids required_outer,
 				   int parallel_workers, bool parallel_aware,
 				   List *partitioned_rels, double rows);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index cafde307ad..ee958a0f07 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -201,6 +201,8 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
 extern Path *get_cheapest_parallel_safe_total_inner(List *paths);
 extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
 					 ScanDirection scandir);
+extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
+						 ScanDirection scandir);
 extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr,
 						 Relids nullable_relids, Oid opno,
 						 Relids rel, bool create_it);
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 4f29d9f891..0583d60d5c 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -2032,6 +2032,86 @@ explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mc
          Filter: ((c > 20) AND (a = 20))
 (9 rows)
 
+-- Test code that uses Append nodes in place of MergeAppend when the
+-- partitions guarantee earlier partitions means lower sort order of the
+-- tuples contained within.
+create index on mcrparted (a, abs(b), c);
+-- check MergeAppend is uses when a default partition exists
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Merge Append
+   Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan using mcrparted5_a_abs_c_idx on mcrparted5
+   ->  Index Scan using mcrparted_def_a_abs_c_idx on mcrparted_def
+(9 rows)
+
+drop table mcrparted_def;
+-- check Append is used for RANGE partitioned table with no default and no subpartitions
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Append
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan using mcrparted5_a_abs_c_idx on mcrparted5
+(7 rows)
+
+-- check Append is used with subpaths in reverse order with backwards index scans.
+explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Append
+   ->  Index Scan Backward using mcrparted5_a_abs_c_idx on mcrparted5
+   ->  Index Scan Backward using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan Backward using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan Backward using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan Backward using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan Backward using mcrparted0_a_abs_c_idx on mcrparted0
+(7 rows)
+
+-- check that Append is not used when there are live subpartitioned tables
+drop table mcrparted5;
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a);
+create table mcrparted5a partition of mcrparted5 for values in(20);
+create table mcrparted5_def partition of mcrparted5 default;
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Merge Append
+   Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+   ->  Index Scan using mcrparted4_a_abs_c_idx on mcrparted4
+   ->  Index Scan using mcrparted5a_a_abs_c_idx on mcrparted5a
+   ->  Index Scan using mcrparted5_def_a_abs_c_idx on mcrparted5_def
+(9 rows)
+
+-- check that Append is used when the sub-partitioned tables are pruned during planning.
+explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Append
+   ->  Index Scan using mcrparted0_a_abs_c_idx on mcrparted0
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted1_a_abs_c_idx on mcrparted1
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted2_a_abs_c_idx on mcrparted2
+         Index Cond: (a < 20)
+   ->  Index Scan using mcrparted3_a_abs_c_idx on mcrparted3
+         Index Cond: (a < 20)
+(9 rows)
+
 drop table mcrparted;
 -- check that partitioned table Appends cope with being referenced in
 -- subplans
@@ -2045,17 +2125,15 @@ explain (costs off) select min(a), max(a) from parted_minmax where b = '12345';
  Result
    InitPlan 1 (returns $0)
      ->  Limit
-           ->  Merge Append
-                 Sort Key: parted_minmax1.a
+           ->  Append
                  ->  Index Only Scan using parted_minmax1i on parted_minmax1
                        Index Cond: ((a IS NOT NULL) AND (b = '12345'::text))
    InitPlan 2 (returns $1)
      ->  Limit
-           ->  Merge Append
-                 Sort Key: parted_minmax1_1.a DESC
+           ->  Append
                  ->  Index Only Scan Backward using parted_minmax1i on parted_minmax1 parted_minmax1_1
                        Index Cond: ((a IS NOT NULL) AND (b = '12345'::text))
-(13 rows)
+(11 rows)
 
 select min(a), max(a) from parted_minmax where b = '12345';
  min | max 
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 24313e8c78..d7c268c5af 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -3013,14 +3013,14 @@ drop table boolp;
 --
 set enable_seqscan = off;
 set enable_sort = off;
-create table ma_test (a int) partition by range (a);
+create table ma_test (a int, b int) partition by range (a);
 create table ma_test_p1 partition of ma_test for values from (0) to (10);
 create table ma_test_p2 partition of ma_test for values from (10) to (20);
 create table ma_test_p3 partition of ma_test for values from (20) to (30);
-insert into ma_test select x from generate_series(0,29) t(x);
-create index on ma_test (a);
+insert into ma_test select x,x from generate_series(0,29) t(x);
+create index on ma_test (b);
 analyze ma_test;
-prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
+prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b;
 -- Execute query 5 times to allow choose_custom_plan
 -- to start considering a generic plan.
 execute mt_q1(0);
@@ -3067,17 +3067,15 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(15);
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
  Merge Append (actual rows=2 loops=1)
-   Sort Key: ma_test_p2.a
+   Sort Key: ma_test_p2.b
    Subplans Removed: 1
-   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
-         Rows Removed by Filter: 4
-   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
+   ->  Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=1 loops=1)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
          Rows Removed by Filter: 9
-(11 rows)
+   ->  Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
+         Rows Removed by Filter: 9
+(9 rows)
 
 execute mt_q1(15);
  a  
@@ -3090,13 +3088,12 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(25);
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
  Merge Append (actual rows=1 loops=1)
-   Sort Key: ma_test_p3.a
+   Sort Key: ma_test_p3.b
    Subplans Removed: 2
-   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
-         Rows Removed by Filter: 4
-(7 rows)
+   ->  Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
+         Rows Removed by Filter: 9
+(6 rows)
 
 execute mt_q1(25);
  a  
@@ -3109,12 +3106,11 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(35);
                                QUERY PLAN                               
 ------------------------------------------------------------------------
  Merge Append (actual rows=0 loops=1)
-   Sort Key: ma_test_p1.a
+   Sort Key: ma_test_p1.b
    Subplans Removed: 2
-   ->  Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
-         Index Cond: (a >= $1)
-         Filter: ((a % 10) = 5)
-(6 rows)
+   ->  Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed)
+         Filter: ((a >= $1) AND ((a % 10) = 5))
+(5 rows)
 
 execute mt_q1(35);
  a 
@@ -3123,23 +3119,23 @@ execute mt_q1(35);
 
 deallocate mt_q1;
 -- ensure initplan params properly prune partitions
-explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
+explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b;
                                                  QUERY PLAN                                                 
 ------------------------------------------------------------------------------------------------------------
  Merge Append (actual rows=20 loops=1)
-   Sort Key: ma_test_p1.a
+   Sort Key: ma_test_p1.b
    InitPlan 2 (returns $1)
      ->  Result (actual rows=1 loops=1)
            InitPlan 1 (returns $0)
              ->  Limit (actual rows=1 loops=1)
-                   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1)
-                         Index Cond: (a IS NOT NULL)
-   ->  Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
-         Index Cond: (a >= $1)
-   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1)
-         Index Cond: (a >= $1)
-   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1)
-         Index Cond: (a >= $1)
+                   ->  Index Scan using ma_test_p2_b_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1)
+                         Index Cond: (b IS NOT NULL)
+   ->  Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed)
+         Filter: (a >= $1)
+   ->  Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=10 loops=1)
+         Filter: (a >= $1)
+   ->  Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=10 loops=1)
+         Filter: (a >= $1)
 (14 rows)
 
 reset enable_seqscan;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index a6e541d4da..889e907c2e 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -721,6 +721,34 @@ explain (costs off) select * from mcrparted where abs(b) = 5;	-- scans all parti
 explain (costs off) select * from mcrparted where a > -1;	-- scans all partitions
 explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10;	-- scans mcrparted4
 explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5, mcrparted_def
+
+-- Test code that uses Append nodes in place of MergeAppend when the
+-- partitions guarantee earlier partitions means lower sort order of the
+-- tuples contained within.
+create index on mcrparted (a, abs(b), c);
+
+-- check MergeAppend is uses when a default partition exists
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+
+drop table mcrparted_def;
+
+-- check Append is used for RANGE partitioned table with no default and no subpartitions
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+
+-- check Append is used with subpaths in reverse order with backwards index scans.
+explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc;
+
+-- check that Append is not used when there are live subpartitioned tables
+drop table mcrparted5;
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a);
+create table mcrparted5a partition of mcrparted5 for values in(20);
+create table mcrparted5_def partition of mcrparted5 default;
+
+explain (costs off) select * from mcrparted order by a, abs(b), c;
+
+-- check that Append is used when the sub-partitioned tables are pruned during planning.
+explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c;
+
 drop table mcrparted;
 
 -- check that partitioned table Appends cope with being referenced in
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index eca1a7c5ac..a834afd572 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -740,15 +740,15 @@ drop table boolp;
 --
 set enable_seqscan = off;
 set enable_sort = off;
-create table ma_test (a int) partition by range (a);
+create table ma_test (a int, b int) partition by range (a);
 create table ma_test_p1 partition of ma_test for values from (0) to (10);
 create table ma_test_p2 partition of ma_test for values from (10) to (20);
 create table ma_test_p3 partition of ma_test for values from (20) to (30);
-insert into ma_test select x from generate_series(0,29) t(x);
-create index on ma_test (a);
+insert into ma_test select x,x from generate_series(0,29) t(x);
+create index on ma_test (b);
 
 analyze ma_test;
-prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
+prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b;
 
 -- Execute query 5 times to allow choose_custom_plan
 -- to start considering a generic plan.
@@ -769,7 +769,7 @@ execute mt_q1(35);
 deallocate mt_q1;
 
 -- ensure initplan params properly prune partitions
-explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
+explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b;
 
 reset enable_seqscan;
 reset enable_sort;
-- 
2.16.2.windows.1