diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index b5cab0c..ee871d6 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -40,7 +40,7 @@ typedef struct
 } Clump;
 
 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
-			bool force);
+			int num_gene, bool force);
 static bool desirable_join(PlannerInfo *root,
 			   RelOptInfo *outer_rel, RelOptInfo *inner_rel);
 
@@ -196,7 +196,7 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
 		cur_clump->size = 1;
 
 		/* Merge it into the clumps list, using only desirable joins */
-		clumps = merge_clump(root, clumps, cur_clump, false);
+		clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
 	}
 
 	if (list_length(clumps) > 1)
@@ -210,7 +210,7 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
 		{
 			Clump	   *clump = (Clump *) lfirst(lc);
 
-			fclumps = merge_clump(root, fclumps, clump, true);
+			fclumps = merge_clump(root, fclumps, clump, num_gene, true);
 		}
 		clumps = fclumps;
 	}
@@ -235,7 +235,8 @@ gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
  * "desirable" joins.
  */
 static List *
-merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
+merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
+			bool force)
 {
 	ListCell   *prev;
 	ListCell   *lc;
@@ -265,7 +266,8 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
 			if (joinrel)
 			{
 				/* Create GatherPaths for any useful partial paths for rel */
-				generate_gather_paths(root, joinrel);
+				if (old_clump->size + new_clump->size < num_gene)
+					generate_gather_paths(root, joinrel, NULL);
 
 				/* Find and save the cheapest paths for this joinrel */
 				set_cheapest(joinrel);
@@ -283,7 +285,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
 				 * others.  When no further merge is possible, we'll reinsert
 				 * it into the list.
 				 */
-				return merge_clump(root, clumps, old_clump, force);
+				return merge_clump(root, clumps, old_clump, num_gene, force);
 			}
 		}
 		prev = lc;
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 2d7e1d8..20c7b21 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -479,14 +479,15 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	}
 
 	/*
-	 * If this is a baserel, consider gathering any partial paths we may have
-	 * created for it.  (If we tried to gather inheritance children, we could
-	 * end up with a very large number of gather nodes, each trying to grab
-	 * its own pool of workers, so don't do this for otherrels.  Instead,
-	 * we'll consider gathering partial paths for the parent appendrel.)
+	 * If this is a baserel and not the only rel, consider gathering any
+	 * partial paths we may have created for it.  (If we tried to gather
+	 * inheritance children, we could end up with a very large number of
+	 * gather nodes, each trying to grab its own pool of workers, so don't
+	 * do this for otherrels.  Instead, we'll consider gathering partial paths
+	 * for the parent appendrel.)
 	 */
-	if (rel->reloptkind == RELOPT_BASEREL)
-		generate_gather_paths(root, rel);
+	if (rel->reloptkind == RELOPT_BASEREL && root->simple_rel_array_size > 2)
+		generate_gather_paths(root, rel, NULL);
 
 	/*
 	 * Allow a plugin to editorialize on the set of Paths for this base
@@ -2192,14 +2193,21 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  *		Gather Merge on top of a partial path.
  *
  * This must not be called until after we're done creating all partial paths
- * for the specified relation.  (Otherwise, add_partial_path might delete a
- * path that some GatherPath or GatherMergePath has a reference to.)
+ * for the specified relation (Otherwise, add_partial_path might delete a
+ * path that some GatherPath or GatherMergePath has a reference to.) and path
+ * target for top level scan/join node is available.
+ *
+ * For GatherPath, we try to push the new target down to its input as well. We
+ * need to do this here instead of doing it in apply_projection_to_path as it
+ * gives us chance to account for the fact that target evaluation can be
+ * performed by workers when it is safe to do so.
  */
 void
-generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
+generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, PathTarget *target)
 {
 	Path	   *cheapest_partial_path;
 	Path	   *simple_gather_path;
+	QualCost	oldcost;
 	ListCell   *lc;
 
 	/* If there are no partial paths, there's nothing to do here. */
@@ -2215,6 +2223,53 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 	simple_gather_path = (Path *)
 		create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
 						   NULL, NULL);
+
+	/*
+	 * We override the final target list into the gather path and update its
+	 * cost estimates accordingly.
+	 */
+	if (target && simple_gather_path->pathtarget != target)
+	{
+		oldcost = simple_gather_path->pathtarget->cost;
+		simple_gather_path->pathtarget = target;
+
+		if (is_parallel_safe(root, (Node *) target->exprs))
+		{
+			GatherPath *gpath = (GatherPath *) simple_gather_path;
+
+			simple_gather_path->startup_cost += target->cost.startup - oldcost.startup;
+
+			/*
+			 * We always use create_projection_path here, even if the subpath is
+			 * projection-capable, so as to avoid modifying the subpath in place.
+			 * It seems unlikely at present that there could be any other
+			 * references to the subpath, but better safe than sorry.
+			 */
+			gpath->subpath = (Path *)
+				create_projection_path(root,
+									   gpath->subpath->parent,
+									   gpath->subpath,
+									   target);
+
+			/*
+			 * Adjust the cost of GatherPath to reflect the fact that the bulk of
+			 * the target evaluation will happen in workers.
+			 */
+			if (((ProjectionPath *) gpath->subpath)->dummypp)
+				simple_gather_path->total_cost += target->cost.startup - oldcost.startup +
+							(target->cost.per_tuple - oldcost.per_tuple) * gpath->subpath->rows;
+			else
+				simple_gather_path->total_cost += target->cost.startup - oldcost.startup +
+							(cpu_tuple_cost + target->cost.per_tuple) * gpath->subpath->rows;
+		}
+		else
+		{
+			simple_gather_path->startup_cost += target->cost.startup - oldcost.startup;
+			simple_gather_path->total_cost += target->cost.startup - oldcost.startup +
+				(target->cost.per_tuple - oldcost.per_tuple) * simple_gather_path->rows;
+		}
+	}
+
 	add_path(rel, simple_gather_path);
 
 	/*
@@ -2224,14 +2279,18 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 	foreach(lc, rel->partial_pathlist)
 	{
 		Path	   *subpath = (Path *) lfirst(lc);
-		GatherMergePath *path;
+		Path	*path;
 
 		if (subpath->pathkeys == NIL)
 			continue;
 
-		path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
+		path = (Path *) create_gather_merge_path(root, rel, subpath, rel->reltarget,
 										subpath->pathkeys, NULL, NULL);
-		add_path(rel, &path->path);
+		/* Add projection step if needed */
+		if (target && path->pathtarget != target)
+			path = apply_projection_to_path(root, rel, path, target);
+
+		add_path(rel, path);
 	}
 }
 
@@ -2397,7 +2456,8 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 			rel = (RelOptInfo *) lfirst(lc);
 
 			/* Create GatherPaths for any useful partial paths for rel */
-			generate_gather_paths(root, rel);
+			if (lev < levels_needed)
+				generate_gather_paths(root, rel, NULL);
 
 			/* Find and save the cheapest paths for this rel */
 			set_cheapest(rel);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fdef00a..1b891c1 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1812,6 +1812,18 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		}
 
 		/*
+		 * Consider ways to implement parallel paths.  We always skip
+		 * generating parallel path for top level scan/join nodes till the
+		 * pathtarget is computed.  This is to ensure that we can account
+		 * for the fact that most of the target evaluation work will be
+		 * performed in workers.
+		 */
+		generate_gather_paths(root, current_rel, scanjoin_target);
+
+		/* Set or update cheapest_total_path and related fields */
+		set_cheapest(current_rel);
+
+		/*
 		 * Upper planning steps which make use of the top scan/join rel's
 		 * partial pathlist will expect partial paths for that rel to produce
 		 * the same output as complete paths ... and we just changed the
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 26567cb..0bc7b09 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2354,10 +2354,6 @@ create_projection_path(PlannerInfo *root,
  * knows that the given path isn't referenced elsewhere and so can be modified
  * in-place.
  *
- * If the input path is a GatherPath, we try to push the new target down to
- * its input as well; this is a yet more invasive modification of the input
- * path, which create_projection_path() can't do.
- *
  * Note that we mustn't change the source path's parent link; so when it is
  * add_path'd to "rel" things will be a bit inconsistent.  So far that has
  * not caused any trouble.
@@ -2392,35 +2388,8 @@ apply_projection_to_path(PlannerInfo *root,
 	path->total_cost += target->cost.startup - oldcost.startup +
 		(target->cost.per_tuple - oldcost.per_tuple) * path->rows;
 
-	/*
-	 * If the path happens to be a Gather path, we'd like to arrange for the
-	 * subpath to return the required target list so that workers can help
-	 * project.  But if there is something that is not parallel-safe in the
-	 * target expressions, then we can't.
-	 */
-	if (IsA(path, GatherPath) &&
-		is_parallel_safe(root, (Node *) target->exprs))
-	{
-		GatherPath *gpath = (GatherPath *) path;
-
-		/*
-		 * We always use create_projection_path here, even if the subpath is
-		 * projection-capable, so as to avoid modifying the subpath in place.
-		 * It seems unlikely at present that there could be any other
-		 * references to the subpath, but better safe than sorry.
-		 *
-		 * Note that we don't change the GatherPath's cost estimates; it might
-		 * be appropriate to do so, to reflect the fact that the bulk of the
-		 * target evaluation will happen in workers.
-		 */
-		gpath->subpath = (Path *)
-			create_projection_path(root,
-								   gpath->subpath->parent,
-								   gpath->subpath,
-								   target);
-	}
-	else if (path->parallel_safe &&
-			 !is_parallel_safe(root, (Node *) target->exprs))
+	if (path->parallel_safe &&
+		!is_parallel_safe(root, (Node *) target->exprs))
 	{
 		/*
 		 * We're inserting a parallel-restricted target list into a path
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 4e06b2e..a4ba769 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -53,7 +53,8 @@ extern void set_dummy_rel_pathlist(RelOptInfo *rel);
 extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
 					 List *initial_rels);
 
-extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
+extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel,
+					PathTarget *target);
 extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
 						double index_pages);
 extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
