On Thu, Oct 24, 2019 at 6:48 PM Andres Freund <and...@anarazel.de> wrote:
> Unfortunately I think the fix isn't all that trivial, due to the way we
> output the per-worker information at the end of ExplainNode(), by just
> dumping things into a string.  It seems to me that a step in the right
> direction would be for ExplainNode() to create
> planstate->worker_instrument StringInfos, which can be handed to
> routines like show_sort_info(), which would print the per-node
> information into that, rather than directly dumping into
> es->output. Most of the current "Show worker detail" would be done
> earlier in ExplainNode(), at the place where we current display the
> "actual rows" bit.
>
> ISTM that should include removing the duplication fo the the contents of
> show_sort_info(), and probably also for the Gather, GatherMerge blocks
> (I've apparently skipped adding the JIT information to the latter, not
> sure if we ought to fix that in the stable branches).
>
> Any chance you want to take a stab at that?

It took me a while, but I did take a stab at it (thanks for your
off-list help). Attached is my patch that changes the structured
formats to merge sort worker output in with costs/timing/buffers
worker output. I have not touched any other worker output yet, since
it's not under a Workers group as far as I can tell (so it does not
exhibit the problem I originally reported). I can try to make further
changes here if the approach is deemed sound. I also have not touched
text output; above you had proposed

>  Sort Method: external merge  Disk: 4920kB
>  Buffers: shared hit=682 read=10188, temp read=1415 written=2101
>  Worker 0: actual time=130.058..130.324 rows=1324 loops=1
>    Sort Method: external merge  Disk: 5880kB
>    Buffers: shared hit=337 read=3489, temp read=505 written=739
>  Worker 1: actual time=130.273..130.512 rows=1297 loops=1
>    Buffers: shared hit=345 read=3507, temp read=505 written=744
>    Sort Method: external merge  Disk: 5920kB

which makes sense to me, but I'd like to confirm this is the approach
we want before I add it to the patch.

This is my first C in close to a decade (and I was never much of a C
programmer to begin with), so please be gentle.

As Andres suggested off-list, I also changed the worker output to
order fields that also occur in the parent node in the same way as the
parent node.

I've also added a test for the patch, and because this is really an
EXPLAIN issue rather than a query feature issue, I added a
src/test/regress/sql/explain.sql for the test. I added a couple of
utility functions for munging json-formatted EXPLAIN plans into
something we can repeatably verify in regression tests (the functions
use json rather than jsonb to preserve field order). I have not added
this for YAML or XML (even though they should behave the same way),
since I'm not familiar with the the functions to manipulate those data
types in a similar way (if they exist). My hunch is due to the
similarity of structured formats, just testing JSON is enough, but I
can expand/adjust tests as necessary.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 62fb3434a3..8f898fc20c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -100,7 +100,7 @@ static void show_sortorder_options(StringInfo buf, Node *sortexpr,
 								   Oid sortOperator, Oid collation, bool nullsFirst);
 static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
 							 List *ancestors, ExplainState *es);
-static void show_sort_info(SortState *sortstate, ExplainState *es);
+static void show_sort_info(SortState *sortstate, StringInfo* worker_strs, ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
 								ExplainState *es);
@@ -290,6 +290,7 @@ NewExplainState(void)
 	es->costs = true;
 	/* Prepare output buffer. */
 	es->str = makeStringInfo();
+	es->root_str = es->str;
 
 	return es;
 }
@@ -1073,9 +1074,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	const char *partialmode = NULL;
 	const char *operation = NULL;
 	const char *custom_name = NULL;
+	StringInfo *worker_strs = NULL;
 	int			save_indent = es->indent;
 	bool		haschildren;
 
+	/* Prepare per-worker output */
+	if (es->analyze && planstate->worker_instrument) {
+		int num_workers = planstate->worker_instrument->num_workers;
+		int n;
+		worker_strs = (StringInfo *) palloc0(num_workers * sizeof(StringInfo));
+		for (n = 0; n < num_workers; n++) {
+			worker_strs[n] = makeStringInfo();
+		}
+	}
+
 	switch (nodeTag(plan))
 	{
 		case T_Result:
@@ -1340,6 +1352,64 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		ExplainPropertyBool("Parallel Aware", plan->parallel_aware, es);
 	}
 
+	/* Prepare worker general execution details */
+	if (es->analyze && es->verbose && planstate->worker_instrument)
+	{
+		WorkerInstrumentation *w = planstate->worker_instrument;
+		int			n;
+
+		for (n = 0; n < w->num_workers; ++n)
+		{
+			Instrumentation *instrument = &w->instrument[n];
+			double		nloops = instrument->nloops;
+			double		startup_ms;
+			double		total_ms;
+			double		rows;
+
+			if (nloops <= 0)
+				continue;
+			startup_ms = 1000.0 * instrument->startup / nloops;
+			total_ms = 1000.0 * instrument->total / nloops;
+			rows = instrument->ntuples / nloops;
+
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				appendStringInfoSpaces(es->str, es->indent * 2);
+				appendStringInfo(es->str, "Worker %d: ", n);
+				if (es->timing)
+					appendStringInfo(es->str,
+									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
+									 startup_ms, total_ms, rows, nloops);
+				else
+					appendStringInfo(es->str,
+									 "actual rows=%.0f loops=%.0f\n",
+									 rows, nloops);
+				es->indent++;
+				if (es->buffers)
+					show_buffer_usage(es, &instrument->bufusage);
+				es->indent--;
+			}
+			else
+			{
+				ExplainOpenWorker(worker_strs[n], es);
+
+				if (es->timing)
+				{
+					ExplainPropertyFloat("Actual Startup Time", "ms",
+										 startup_ms, 3, es);
+					ExplainPropertyFloat("Actual Total Time", "ms",
+										 total_ms, 3, es);
+				}
+				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+			}
+		}
+
+		if (es->format != EXPLAIN_FORMAT_TEXT) {
+			ExplainCloseWorker(es);
+		}
+	}
+
 	switch (nodeTag(plan))
 	{
 		case T_SeqScan:
@@ -1839,7 +1909,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			break;
 		case T_Sort:
 			show_sort_keys(castNode(SortState, planstate), ancestors, es);
-			show_sort_info(castNode(SortState, planstate), es);
+			show_sort_info(castNode(SortState, planstate), worker_strs, es);
 			break;
 		case T_MergeAppend:
 			show_merge_append_keys(castNode(MergeAppendState, planstate),
@@ -1868,73 +1938,30 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	if (es->buffers && planstate->instrument)
 		show_buffer_usage(es, &planstate->instrument->bufusage);
 
-	/* Show worker detail */
-	if (es->analyze && es->verbose && planstate->worker_instrument)
+	/* Prepare worker buffer usage */
+	if (es->buffers && es->analyze && es->verbose && planstate->worker_instrument
+		&& es->format != EXPLAIN_FORMAT_TEXT)
 	{
 		WorkerInstrumentation *w = planstate->worker_instrument;
-		bool		opened_group = false;
 		int			n;
 
 		for (n = 0; n < w->num_workers; ++n)
 		{
 			Instrumentation *instrument = &w->instrument[n];
 			double		nloops = instrument->nloops;
-			double		startup_ms;
-			double		total_ms;
-			double		rows;
 
 			if (nloops <= 0)
 				continue;
-			startup_ms = 1000.0 * instrument->startup / nloops;
-			total_ms = 1000.0 * instrument->total / nloops;
-			rows = instrument->ntuples / nloops;
 
-			if (es->format == EXPLAIN_FORMAT_TEXT)
-			{
-				appendStringInfoSpaces(es->str, es->indent * 2);
-				appendStringInfo(es->str, "Worker %d: ", n);
-				if (es->timing)
-					appendStringInfo(es->str,
-									 "actual time=%.3f..%.3f rows=%.0f loops=%.0f\n",
-									 startup_ms, total_ms, rows, nloops);
-				else
-					appendStringInfo(es->str,
-									 "actual rows=%.0f loops=%.0f\n",
-									 rows, nloops);
-				es->indent++;
-				if (es->buffers)
-					show_buffer_usage(es, &instrument->bufusage);
-				es->indent--;
-			}
-			else
-			{
-				if (!opened_group)
-				{
-					ExplainOpenGroup("Workers", "Workers", false, es);
-					opened_group = true;
-				}
-				ExplainOpenGroup("Worker", NULL, true, es);
-				ExplainPropertyInteger("Worker Number", NULL, n, es);
-
-				if (es->timing)
-				{
-					ExplainPropertyFloat("Actual Startup Time", "ms",
-										 startup_ms, 3, es);
-					ExplainPropertyFloat("Actual Total Time", "ms",
-										 total_ms, 3, es);
-				}
-				ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-				ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
-
-				if (es->buffers)
-					show_buffer_usage(es, &instrument->bufusage);
-
-				ExplainCloseGroup("Worker", NULL, true, es);
-			}
+			ExplainOpenWorker(worker_strs[n], es);
+			show_buffer_usage(es, &instrument->bufusage);
 		}
+		ExplainCloseWorker(es);
+	}
 
-		if (opened_group)
-			ExplainCloseGroup("Workers", "Workers", false, es);
+	/* Show worker detail */
+	if (planstate->worker_instrument) {
+		ExplainFlushWorkers(worker_strs, planstate->worker_instrument->num_workers, es);
 	}
 
 	/* Get ready to display the child plans */
@@ -2534,7 +2561,7 @@ show_tablesample(TableSampleClause *tsc, PlanState *planstate,
  * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
  */
 static void
-show_sort_info(SortState *sortstate, ExplainState *es)
+show_sort_info(SortState *sortstate, StringInfo *worker_strs, ExplainState *es)
 {
 	if (!es->analyze)
 		return;
@@ -2569,7 +2596,6 @@ show_sort_info(SortState *sortstate, ExplainState *es)
 	if (sortstate->shared_info != NULL)
 	{
 		int			n;
-		bool		opened_group = false;
 
 		for (n = 0; n < sortstate->shared_info->num_workers; n++)
 		{
@@ -2594,21 +2620,15 @@ show_sort_info(SortState *sortstate, ExplainState *es)
 			}
 			else
 			{
-				if (!opened_group)
-				{
-					ExplainOpenGroup("Workers", "Workers", false, es);
-					opened_group = true;
-				}
-				ExplainOpenGroup("Worker", NULL, true, es);
-				ExplainPropertyInteger("Worker Number", NULL, n, es);
+				ExplainOpenWorker(worker_strs[n], es);
 				ExplainPropertyText("Sort Method", sortMethod, es);
 				ExplainPropertyInteger("Sort Space Used", "kB", spaceUsed, es);
 				ExplainPropertyText("Sort Space Type", spaceType, es);
-				ExplainCloseGroup("Worker", NULL, true, es);
 			}
 		}
-		if (opened_group)
-			ExplainCloseGroup("Workers", "Workers", false, es);
+		if (es->format != EXPLAIN_FORMAT_TEXT) {
+			ExplainCloseWorker(es);
+		}
 	}
 }
 
@@ -3716,6 +3736,62 @@ ExplainCloseGroup(const char *objtype, const char *labelname,
 	}
 }
 
+/*
+ * Begin output for a specific worker. If we are currently producing output
+ * for another worker, close that worker automatically.
+ */
+void
+ExplainOpenWorker(StringInfo worker_str, ExplainState *es)
+{
+	/*
+	 * We indent twice--once for the "Worker" group and once for the "Workers"
+	 * group--but only need to adjust indentation once for each string of
+	 * ExplainOpenWorker calls.
+	 */
+	if (es->str == es->root_str) {
+		es->indent += 2;
+	}
+
+	es->print_workers = true;
+	es->str = worker_str;
+}
+
+/*
+ * Explicitly end output for the current worker and resume output
+ * for the root of the node.
+ */
+void
+ExplainCloseWorker(ExplainState *es)
+{
+	es->indent -= 2;
+	es->str = es->root_str;
+}
+
+/*
+ * Flush output registered so far to the output buffer and deallocate
+ * mechanism for writing to workers.
+ */
+void
+ExplainFlushWorkers(StringInfo* worker_strs, int num_workers, ExplainState *es)
+{
+	int i;
+
+	if (!es->print_workers) {
+		return;
+	}
+
+	ExplainOpenGroup("Workers", "Workers", false, es);
+	for (i = 0; i < num_workers; i++) {
+		ExplainOpenGroup("Worker", NULL, true, es);
+		ExplainPropertyInteger("Worker Number", NULL, i, es);
+		appendStringInfoString(es->str, worker_strs[i]->data);
+		ExplainCloseGroup("Worker", NULL, true, es);
+	}
+	ExplainCloseGroup("Workers", "Workers", false, es);
+	// do we have any other cleanup to do?
+	es->print_workers = false;
+}
+
 /*
  * Emit a "dummy" group that never has any members.
  *
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 8639891c16..e16f6872d8 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -27,7 +27,7 @@ typedef enum ExplainFormat
 
 typedef struct ExplainState
 {
-	StringInfo	str;			/* output buffer */
+	StringInfo	str;			/* current output buffer */
 	/* options */
 	bool		verbose;		/* be verbose */
 	bool		analyze;		/* print actual times */
@@ -40,6 +40,8 @@ typedef struct ExplainState
 	/* state for output formatting --- not reset for each new plan tree */
 	int			indent;			/* current indentation level */
 	List	   *grouping_stack; /* format-specific grouping state */
+	bool        print_workers;  /* whether current node has worker metadata */
+	StringInfo	root_str;		/* root output buffer */
 	/* state related to the current plan tree (filled by ExplainPrintPlan) */
 	PlannedStmt *pstmt;			/* top of plan */
 	List	   *rtable;			/* range table */
@@ -110,4 +112,9 @@ extern void ExplainOpenGroup(const char *objtype, const char *labelname,
 extern void ExplainCloseGroup(const char *objtype, const char *labelname,
 							  bool labeled, ExplainState *es);
 
+extern void ExplainOpenWorker(StringInfo worker_str, ExplainState *es);
+extern void ExplainCloseWorker(ExplainState *es);
+extern void ExplainFlushWorkers(StringInfo *worker_strs, int num_workers, ExplainState *es);
+
+
 #endif							/* EXPLAIN_H */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
new file mode 100644
index 0000000000..db9914db7b
--- /dev/null
+++ b/src/test/regress/expected/explain.out
@@ -0,0 +1,75 @@
+--
+-- EXPLAIN
+--
+-- helper functions for examining json explain output
+-- return a json explain of given query
+CREATE OR REPLACE FUNCTION json_explain(query text, variadic opts text[])
+  RETURNS json LANGUAGE PLPGSQL AS $$
+DECLARE
+  plan json;
+BEGIN
+  EXECUTE 'EXPLAIN (' || array_to_string(array_append(opts, 'FORMAT JSON) '), ', ') || query INTO STRICT plan;
+  RETURN plan;
+END;
+$$;
+/*
+ * Takes a json object and processes its keys and values. For every
+ * matching key whose value is a number, set it to zero to facilitate
+ * comparisons for values that can vary across executions. If not a number,
+ * set the value to -1 to indicate we matched an unexpected key. Return
+ * non-matching keys and values unmodified.
+ */
+CREATE OR REPLACE FUNCTION json_normalize_numeric_keys_matching(obj json, regexp text)
+  RETURNS json LANGUAGE SQL AS $$
+SELECT
+  coalesce(json_object_agg(
+    key,
+    CASE
+      WHEN key ~ regexp THEN
+        CASE json_typeof(value)
+	  WHEN 'number' THEN 0::text::json
+	  ELSE (-1)::text::json
+	END
+      ELSE value
+    END
+  ), '{}')
+FROM
+  json_each(obj)
+$$;
+-- explain only worker information, normalizing any values that could vary across executions
+CREATE OR REPLACE FUNCTION explain_workers(plan_node json)
+  RETURNS json LANGUAGE SQL AS $$
+SELECT
+  coalesce(json_object_agg(key, value), '{}')
+FROM (
+  SELECT
+    key,
+    CASE key
+      WHEN 'Workers' THEN
+        (SELECT json_agg(
+	   json_normalize_numeric_keys_matching(worker, '(Blocks|Time)$|Sort Space Used')
+        )
+        FROM json_array_elements(value) AS workers(worker))
+      WHEN 'Plans' THEN
+	(SELECT json_agg(
+	  explain_workers(child)
+	)
+	FROM json_array_elements(value) AS children(child))
+    END AS value
+  FROM
+    json_each(plan_node) AS entries(key, value)
+  WHERE
+    key IN ('Workers', 'Plans')
+) AS plan_fields(key, value);
+$$;
+-- test that per-worker sort and buffers output is combined correctly in EXPLAIN
+set force_parallel_mode = true;
+SELECT explain_workers(json_explain($$
+  SELECT * FROM (VALUES(1),(2)) x ORDER BY x;
+$$, 'verbose', 'analyze', 'buffers') -> 0 -> 'Plan');
+                                                                                                                                                                                                                                                                                                                                                                                                                                                              explain_workers                                                                                                                                                                                                                                                                                                                                                                                                                                                               
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ { "Plans" : [{ "Workers" : [{ "Worker Number" : 0, "Actual Startup Time" : 0, "Actual Total Time" : 0, "Actual Rows" : 2, "Actual Loops" : 1, "Sort Method" : "quicksort", "Sort Space Used" : 0, "Sort Space Type" : "Memory", "Shared Hit Blocks" : 0, "Shared Read Blocks" : 0, "Shared Dirtied Blocks" : 0, "Shared Written Blocks" : 0, "Local Hit Blocks" : 0, "Local Read Blocks" : 0, "Local Dirtied Blocks" : 0, "Local Written Blocks" : 0, "Temp Read Blocks" : 0, "Temp Written Blocks" : 0 }], "Plans" : [{ "Workers" : [{ "Worker Number" : 0, "Actual Startup Time" : 0, "Actual Total Time" : 0, "Actual Rows" : 2, "Actual Loops" : 1, "Shared Hit Blocks" : 0, "Shared Read Blocks" : 0, "Shared Dirtied Blocks" : 0, "Shared Written Blocks" : 0, "Local Hit Blocks" : 0, "Local Read Blocks" : 0, "Local Dirtied Blocks" : 0, "Local Written Blocks" : 0, "Temp Read Blocks" : 0, "Temp Written Blocks" : 0 }] }] }] }
+(1 row)
+
+reset force_parallel_mode;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index d33a4e143d..d2b17dd3ea 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -112,7 +112,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
 # ----------
 # Another group of parallel tests
 # ----------
-test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort
+test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain
 
 # event triggers cannot run concurrently with any test that runs DDL
 test: event_trigger
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index f86f5c5682..acba391332 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -193,6 +193,7 @@ test: indexing
 test: partition_aggregate
 test: partition_info
 test: tuplesort
+test: explain
 test: event_trigger
 test: fast_default
 test: stats
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
new file mode 100644
index 0000000000..5fb6e7d2e1
--- /dev/null
+++ b/src/test/regress/sql/explain.sql
@@ -0,0 +1,77 @@
+--
+-- EXPLAIN
+--
+
+-- helper functions for examining json explain output
+
+-- return a json explain of given query
+CREATE OR REPLACE FUNCTION json_explain(query text, variadic opts text[])
+  RETURNS json LANGUAGE PLPGSQL AS $$
+DECLARE
+  plan json;
+BEGIN
+  EXECUTE 'EXPLAIN (' || array_to_string(array_append(opts, 'FORMAT JSON) '), ', ') || query INTO STRICT plan;
+  RETURN plan;
+END;
+$$;
+
+/*
+ * Takes a json object and processes its keys and values. For every
+ * matching key whose value is a number, set it to zero to facilitate
+ * comparisons for values that can vary across executions. If not a number,
+ * set the value to -1 to indicate we matched an unexpected key. Return
+ * non-matching keys and values unmodified.
+ */
+CREATE OR REPLACE FUNCTION json_normalize_numeric_keys_matching(obj json, regexp text)
+  RETURNS json LANGUAGE SQL AS $$
+SELECT
+  coalesce(json_object_agg(
+    key,
+    CASE
+      WHEN key ~ regexp THEN
+        CASE json_typeof(value)
+	  WHEN 'number' THEN 0::text::json
+	  ELSE (-1)::text::json
+	END
+      ELSE value
+    END
+  ), '{}')
+FROM
+  json_each(obj)
+$$;
+
+-- explain only worker information, normalizing any values that could vary across executions
+CREATE OR REPLACE FUNCTION explain_workers(plan_node json)
+  RETURNS json LANGUAGE SQL AS $$
+SELECT
+  coalesce(json_object_agg(key, value), '{}')
+FROM (
+  SELECT
+    key,
+    CASE key
+      WHEN 'Workers' THEN
+        (SELECT json_agg(
+	   json_normalize_numeric_keys_matching(worker, '(Blocks|Time)$|Sort Space Used')
+        )
+        FROM json_array_elements(value) AS workers(worker))
+      WHEN 'Plans' THEN
+	(SELECT json_agg(
+	  explain_workers(child)
+	)
+	FROM json_array_elements(value) AS children(child))
+    END AS value
+  FROM
+    json_each(plan_node) AS entries(key, value)
+  WHERE
+    key IN ('Workers', 'Plans')
+) AS plan_fields(key, value);
+$$;
+
+-- test that per-worker sort and buffers output is combined correctly in EXPLAIN
+set force_parallel_mode = true;
+
+SELECT explain_workers(json_explain($$
+  SELECT * FROM (VALUES(1),(2)) x ORDER BY x;
+$$, 'verbose', 'analyze', 'buffers') -> 0 -> 'Plan');
+
+reset force_parallel_mode;

Reply via email to