On Fri, Jun 17, 2016 at 9:29 AM, Robert Haas <robertmh...@gmail.com> wrote:
> On Fri, Jun 17, 2016 at 9:04 AM, Amit Kapila <amit.kapil...@gmail.com> wrote:
>> Attached, please find updated patch based on above lines.  Due to addition
>> of projection path on top of partial paths, some small additional cost is
>> added for parallel paths. In one of the tests in select_parallel.sql the
>> plan was getting converted to serial plan from parallel plan due to this
>> cost, so I have changed it to make it more restrictive.  Before changing, I
>> have debugged the test to confirm that it was due to this new projection
>> path cost.  I have added two new tests as well to cover the new code added
>> by patch.
>
> I'm going to go work on this patch now.

Here is a revised version, which I plan to commit in a few hours
before the workday expires.  I kept it basically as Amit had it, but I
whacked the comments around some and, more substantively, avoided
inserting and/or charging for a Result node when that's not necessary.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index cefec7b..0434a5a 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -465,7 +465,8 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 	 * cheapest path.)
 	 */
 	sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
-										   create_pathtarget(subroot, tlist));
+										   create_pathtarget(subroot, tlist),
+										   false);
 
 	/*
 	 * Determine cost to get just the first row of the presorted path.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 07b925e..0af8151 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1500,6 +1500,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		PathTarget *grouping_target;
 		PathTarget *scanjoin_target;
 		bool		have_grouping;
+		bool		scanjoin_target_parallel_safe = false;
 		WindowFuncLists *wflists = NULL;
 		List	   *activeWindows = NIL;
 		List	   *rollup_lists = NIL;
@@ -1730,7 +1731,16 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			scanjoin_target = grouping_target;
 
 		/*
-		 * Forcibly apply that target to all the Paths for the scan/join rel.
+		 * Check whether scan/join target is parallel safe ... unless there
+		 * are no partial paths, in which case we don't care.
+		 */
+		if (current_rel->partial_pathlist &&
+			!has_parallel_hazard((Node *) scanjoin_target->exprs, false))
+			scanjoin_target_parallel_safe = true;
+
+		/*
+		 * Forcibly apply scan/join target to all the Paths for the scan/join
+		 * rel.
 		 *
 		 * In principle we should re-run set_cheapest() here to identify the
 		 * cheapest path, but it seems unlikely that adding the same tlist
@@ -1746,7 +1756,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 
 			Assert(subpath->param_info == NULL);
 			path = apply_projection_to_path(root, current_rel,
-											subpath, scanjoin_target);
+											subpath, scanjoin_target,
+											scanjoin_target_parallel_safe);
 			/* If we had to add a Result, path is different from subpath */
 			if (path != subpath)
 			{
@@ -1759,6 +1770,70 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		}
 
 		/*
+		 * Upper planning steps which make use of the top scan/join rel's
+		 * partial pathlist will expect partial paths for that rel to produce
+		 * the same output as complete paths ... and we just changed the
+		 * output for the complete paths, so we'll need to do the same thing
+		 * for partial paths.
+		 */
+		if (scanjoin_target_parallel_safe)
+		{
+			/*
+			 * Apply the scan/join target to each partial path.  Otherwise,
+			 * anything that attempts to use the partial paths for further
+			 * upper planning may go wrong.
+			 */
+			foreach(lc, current_rel->partial_pathlist)
+			{
+				Path	   *subpath = (Path *) lfirst(lc);
+				Path	   *newpath;
+
+				/*
+				 * We can't use apply_projection_to_path() here, because there
+				 * could already be pointers to these paths, and therefore
+				 * we cannot modify them in place.  Instead, we must use
+				 * create_projection_path().  The good news is this won't
+				 * actually insert a Result node into the final plan unless
+				 * it's needed, but the bad news is that it will charge for
+				 * the node whether it's needed or not.  Therefore, if the
+				 * target list is already what we need it to be, just leave
+				 * this partial path alone.
+				 */
+				if (equal(scanjoin_target->exprs, subpath->pathtarget->exprs))
+					continue;
+
+				Assert(subpath->param_info == NULL);
+				newpath = (Path *) create_projection_path(root,
+															 current_rel,
+															 subpath,
+															 scanjoin_target);
+				if (is_projection_capable_path(subpath))
+				{
+					/*
+					 * Since the target lists differ, a projection path is
+					 * essential, but it will disappear at plan creation time
+					 * because the subpath is projection-capable.  So avoid
+					 * charging anything for the disappearing node.
+					 */
+					newpath->startup_cost = subpath->startup_cost;
+					newpath->total_cost = subpath->total_cost;
+				}
+
+				lfirst(lc) = newpath;
+			}
+		}
+		else
+		{
+			/*
+			 * In the unfortunate event that scanjoin_target is not
+			 * parallel-safe, we can't apply it to the partial paths; in
+			 * that case, we'll need to forget about the partial paths, which
+			 * aren't valid input for upper planning steps.
+			 */
+			current_rel->partial_pathlist = NIL;
+		}
+
+		/*
 		 * Save the various upper-rel PathTargets we just computed into
 		 * root->upper_targets[].  The core code doesn't use this, but it
 		 * provides a convenient place for extensions to get at the info.  For
@@ -4153,7 +4228,7 @@ create_ordered_paths(PlannerInfo *root,
 			/* Add projection step if needed */
 			if (path->pathtarget != target)
 				path = apply_projection_to_path(root, ordered_rel,
-												path, target);
+												path, target, false);
 
 			add_path(ordered_rel, path);
 		}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 552b756..30975e0 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -325,7 +325,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 									 refnames_tlist);
 
 		path = apply_projection_to_path(root, rel, path,
-										create_pathtarget(root, tlist));
+										create_pathtarget(root, tlist),
+										false);
 
 		/* Return the fully-fledged tlist to caller, too */
 		*pTargetList = tlist;
@@ -394,7 +395,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 											path->parent,
 											path,
 											create_pathtarget(root,
-															  *pTargetList));
+															  *pTargetList),
+											false);
 		}
 		return path;
 	}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 6b57de3..5b66ff5 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2217,12 +2217,14 @@ create_projection_path(PlannerInfo *root,
  * 'rel' is the parent relation associated with the result
  * 'path' is the path representing the source of data
  * 'target' is the PathTarget to be computed
+ * 'target_parallel' indicates that target expressions are all parallel-safe
  */
 Path *
 apply_projection_to_path(PlannerInfo *root,
 						 RelOptInfo *rel,
 						 Path *path,
-						 PathTarget *target)
+						 PathTarget *target,
+						 bool target_parallel)
 {
 	QualCost	oldcost;
 
@@ -2248,8 +2250,7 @@ apply_projection_to_path(PlannerInfo *root,
 	 * project. But if there is something that is not parallel-safe in the
 	 * target expressions, then we can't.
 	 */
-	if (IsA(path, GatherPath) &&
-		!has_parallel_hazard((Node *) target->exprs, false))
+	if (IsA(path, GatherPath) && target_parallel)
 	{
 		GatherPath *gpath = (GatherPath *) path;
 
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 5de4c34..586ecdd 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -143,7 +143,8 @@ extern ProjectionPath *create_projection_path(PlannerInfo *root,
 extern Path *apply_projection_to_path(PlannerInfo *root,
 						 RelOptInfo *rel,
 						 Path *path,
-						 PathTarget *target);
+						 PathTarget *target,
+						 bool target_parallel);
 extern SortPath *create_sort_path(PlannerInfo *root,
 				 RelOptInfo *rel,
 				 Path *subpath,
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 6f1f247..3c90640 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -51,6 +51,38 @@ select parallel_restricted(unique1) from tenk1
                Filter: (tenk1.stringu1 = 'GRAAAA'::name)
 (9 rows)
 
+-- test parallel plan when group by expression is in target list.
+explain (costs off)
+	select length(stringu1) from tenk1 group by length(stringu1);
+                    QUERY PLAN                     
+---------------------------------------------------
+ Finalize HashAggregate
+   Group Key: (length((stringu1)::text))
+   ->  Gather
+         Workers Planned: 4
+         ->  Partial HashAggregate
+               Group Key: length((stringu1)::text)
+               ->  Parallel Seq Scan on tenk1
+(7 rows)
+
+select length(stringu1) from tenk1 group by length(stringu1);
+ length 
+--------
+      6
+(1 row)
+
+-- test that parallel plan for aggregates is not selected when
+-- target list contains parallel restricted clause.
+explain (costs off)
+	select  sum(parallel_restricted(unique1)) from tenk1
+	group by(parallel_restricted(unique1));
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: parallel_restricted(unique1)
+   ->  Index Only Scan using tenk1_unique1 on tenk1
+(3 rows)
+
 set force_parallel_mode=1;
 explain (costs off)
   select stringu1::int2 from tenk1 where unique1 = 1;
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index 7b607c2..a8cd993 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -24,6 +24,17 @@ explain (verbose, costs off)
 select parallel_restricted(unique1) from tenk1
   where stringu1 = 'GRAAAA' order by 1;
 
+-- test parallel plan when group by expression is in target list.
+explain (costs off)
+	select length(stringu1) from tenk1 group by length(stringu1);
+select length(stringu1) from tenk1 group by length(stringu1);
+
+-- test that parallel plan for aggregates is not selected when
+-- target list contains parallel restricted clause.
+explain (costs off)
+	select  sum(parallel_restricted(unique1)) from tenk1
+	group by(parallel_restricted(unique1));
+
 set force_parallel_mode=1;
 
 explain (costs off)
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to