Re: [PATCH] Add extra statistics to explain for Nested Loop

2021-04-14 Thread e . sokolova
Thank you for working on this issue. Your comments helped me make this 
patch more correct.


Lines with "colon" format shouldn't use equal signs, and should use two 
spaces

between fields.
Done. Now extra line looks like "Loop min_rows: %.0f  max_rows: %.0f  
total_rows: %.0f" or "Loop min_time: %.3f  max_time: %.3f  min_rows: 
%.0f  max_rows: %.0f  total_rows: %.0f".


Since this is now on a separate line, the "if (nloops > 1 && 
es->verbose)"

can be after the existing "if (es->timing)", and shouldn't need its own
"if (es->timing)".  It should conditionally add a separate line, rather 
than

duplicating the "(actual.*" line.


-   if (es->timing)
+   if (nloops > 1 && es->verbose)
New version of patch contains this correction. It helped make the patch 
shorter.


In non-text mode, think you should not check "nloops > 1".  Rather, 
print the

field as 0.
The fields will not be zeros. New line will almost repeat the line with 
main sttistics.


I think the labels in non-text format should say "Loop Min Time" or 
similar.


And these variables should have a loop_ prefix like loop_min_t ?

There are good ideas. I changed it.

I apply new version of this patch. I hope it got better.
Please don't hesitate to share any thoughts on this topic.

--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres CompanyFrom: "Ekaterina Sokolova" 
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b62a76e7e5a..bf8c37baefd 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1615,6 +1615,11 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		loop_total_rows = planstate->instrument->ntuples;
+		double		loop_min_r = planstate->instrument->min_tuples;
+		double		loop_max_r = planstate->instrument->max_tuples;
+		double		loop_min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		loop_max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
@@ -1626,6 +1631,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
 appendStringInfo(es->str,
  " (actual rows=%.0f loops=%.0f)",
  rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+appendStringInfo(es->str, "\n");
+ExplainIndentText(es);
+if (es->timing)
+	appendStringInfo(es->str,
+	 "Loop min_time: %.3f  max_time: %.3f  min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+	 loop_min_t_ms, loop_max_t_ms, loop_min_r, loop_max_r, loop_total_rows);
+else
+	appendStringInfo(es->str,
+	 "Loop min_rows: %.0f  max_rows: %.0f  total_rows: %.0f",
+	 loop_min_r, loop_max_r, loop_total_rows);
+			}
 		}
 		else
 		{
@@ -1635,8 +1653,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	 3, es);
 ExplainPropertyFloat("Actual Total Time", "s", total_ms,
 	 3, es);
+if (nloops > 1 && es->verbose)
+{
+	ExplainPropertyFloat("Loop Min Time", "s", loop_min_t_ms,
+		 3, es);
+	ExplainPropertyFloat("Loop Max Time", "s", loop_max_t_ms,
+		 3, es);
+}
 			}
 			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+			if (nloops > 1 && es->verbose)
+			{
+ExplainPropertyFloat("Loop Min Rows", NULL, loop_min_r, 0, es);
+ExplainPropertyFloat("Loop Max Rows", NULL, loop_max_r, 0, es);
+ExplainPropertyFloat("Loop Total Rows", NULL, loop_total_rows, 0, es);
+			}
 			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
@@ -1646,6 +1677,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			appendStringInfoString(es->str, " (never executed)");
 		else
 		{
+			/* without min and max values because actual result is 0 */
 			if (es->timing)
 			{
 ExplainPropertyFloat("Actual Startup Time", "ms", 0.0, 3, es);
@@ -1672,12 +1704,22 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			double		startup_ms;
 			double		total_ms;
 			double		rows;
+			double		loop_min_t_ms;
+			double		loop_max_t_ms;
+			double		loop_min_r;
+			double		loop_max_r;
+			double		loop_total_rows;
 
 			if (nloops <= 0)
 continue;
 			startup_ms = 1000.0 * instrument->startup / nloops;
 			total_ms = 1000.0 * instrument->total / nloops;
 			rows = instrument->ntuples / nloops;
+			loop_min_t_ms = 1000.0 * instrument->min_t;
+			loop_max_t_ms = 1000.0 * instrument->max_t;
+			loop_min_r = instrument->min_tuples;
+			loop_max_r = instrument->max_tuples;
+			loop_total_rows = instrument->ntuples;
 
 			ExplainOpenWorker(n, es);
 
@@ -1692,6 +1734,19 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	appendStringInfo(es->str,
 	 "actual rows=%.0f loops=%.0f\n",
 	 rows, nloops);
+if (nloops > 1)
+{
+  

Re: [PATCH] Add extra statistics to explain for Nested Loop

2021-03-25 Thread e . sokolova

Thank you all for your feedback and reforms.
I attach a new version of the patch with the some changes and fixes. 
Here's a list of the major changes:
1) New format of extra statistics. This is now contained in a line 
separate from the main statistics.


Julien Rouhaud писал 2021-02-01 08:28:
On Thu, Jan 28, 2021 at 8:38 PM Yugo NAGATA  
wrote:


postgres=# explain (analyze, verbose) select * from a,b where a.i=b.j;
   
 QUERY PLAN

---
 Nested Loop  (cost=0.00..2752.00 rows=991 width=8) (actual 
time=0.021..17.651 rows=991 loops=1)

   Output: a.i, b.j
   Join Filter: (a.i = b.j)
   Rows Removed by Join Filter: 99009
   ->  Seq Scan on public.b  (cost=0.00..2.00 rows=100 width=4) 
(actual time=0.009..0.023 rows=100 loops=1)

 Output: b.j
   ->  Seq Scan on public.a  (cost=0.00..15.00 rows=1000 width=4) 
(actual time=0.005..0.091 min_time=0.065 max_time=0.163 min_rows=1000 
rows=1000 max_rows=1000 loops=100)

 Output: a.i
 Planning Time: 0.066 ms
 Execution Time: 17.719 ms
(10 rows)

I don't like this format where the extra statistics appear in the same
line of existing information because the output format differs 
depended
on whether the plan node's loops > 1 or not. This makes the length of 
a
line too long. Also, other information reported by VERBOSE doesn't 
change

the exiting row format and just add extra rows for new information.

Instead, it seems good for me to add extra rows for the new statistics
without changint the existing row format as other VERBOSE information,
like below.

   ->  Seq Scan on public.a  (cost=0.00..15.00 rows=1000 width=4) 
(actual time=0.005..0.091 rows=1000  loops=100)

 Output: a.i
 Loops: min_time=0.065 max_time=0.163 min_rows=1000 
max_rows=1000


and so  on. What do you think about it?




2) Correction of the case of parallel scan


In parallel scan, the extra statistics are not reported correctly.

This reports max/min rows or time of inner scan as 0 in parallel 
workers,
and as a result only the leader process's ones are accounted. To fix 
this,

we would change InstrAggNode as below.





3) Adding extra statistics about total number of rows (total rows). 
There were many wishes for this here.


Please don't hesitate to share any thoughts on this topic.

--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index afc45429ba4..723eccca013 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1589,29 +1589,82 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		total_rows = planstate->instrument->ntuples;
+		double		min_r = planstate->instrument->min_tuples;
+		double		max_r = planstate->instrument->max_tuples;
+		double		min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (es->timing)
-appendStringInfo(es->str,
- " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
- startup_ms, total_ms, rows, nloops);
+			if (nloops > 1 && es->verbose)
+			{
+if (es->timing)
+{
+	appendStringInfo(es->str,
+	 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)\n",
+	 startup_ms, total_ms, rows, nloops);
+	ExplainIndentText(es);
+	appendStringInfo(es->str,
+	 "Loop: min_time=%.3f max_time=%.3f min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+	 min_t_ms, max_t_ms, min_r, max_r, total_rows);
+}
+else
+{
+	appendStringInfo(es->str,
+	 " (actual rows=%.0f loops=%.0f)\n",
+	 rows, nloops);
+	ExplainIndentText(es);
+	appendStringInfo(es->str,
+	 "Loop: min_rows=%.0f max_rows=%.0f total_rows=%.0f",
+	 min_r, max_r, total_rows);
+}
+			}
 			else
-appendStringInfo(es->str,
- " (actual rows=%.0f loops=%.0f)",
- rows, nloops);
+			{
+if (es->timing)
+	appendStringInfo(es->str,
+	 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+	 startup_ms, total_ms, rows, nloops);
+else
+	appendStringInfo(es->str,
+	 " (actual rows=%.0f loops=%.0f)",
+	 rows, nloops);
+			}
 		}
 		else
 		{
-			if (es->timing)
+			if (nloops > 1 && es->verbose)
+			{
+if (es->timing)
+{
+	ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+		 3, es);
+	ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+		 3, es);
+	

Re: [PATCH] Add extra statistics to explain for Nested Loop

2020-10-23 Thread e . sokolova

 wrote:

You should update the explain_parallel_append() plpgsql function
created in that test file to make sure that both "rows" and the two
new counters are changed to "N".  There might be other similar changes
needed.


Thank you for watching this issue. I made the necessary changes in tests 
following your advice.



Now, for the changes themselves.  For the min/max time, you're
aggregating "totaltime - instr->firsttuple".  Why removing the startup
time from the loop execution time?  I think this should be kept.


I think it's right remark. I fixed it.


Also, in explain.c you display the counters in the "Nested loop" node,
but this should be done in the inner plan node instead, as this is the
one being looped on.  So the condition should probably be "nloops > 1"
rather than testing if it's a NestLoop.


Condition "nloops > 1" is not the same as checking if it's NestLoop. 
This condition will lead to printing extra statistics for nodes with 
different types of join, not only Nested Loop Join. If this statistic is 
useful for other plan nodes, it makes sense to change the condition.


 wrote:

I'm a bit worried that further increasing the size of struct
Instrumentation will increase the overhead of EXPLAIN ANALYZE further -
in some workloads we spend a fair bit of time in code handling that. It
would be good to try to find a few bad cases, and see what the overhead 
is.


Thank you for this comment, I will try to figure it out. Do you have 
some examples of large overhead dependent on this struct? I think I need 
some sample to know which way to think.


Thank you all for the feedback. I hope the new version of my patch will 
be more correct and useful.

Please don't hesitate to share any thoughts on this topic!
--
Ekaterina Sokolova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres CompanyFrom: "Ekaterina Sokolova" 
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 41317f18374..2132d82fe79 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1568,29 +1568,64 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		double		startup_ms = 1000.0 * planstate->instrument->startup / nloops;
 		double		total_ms = 1000.0 * planstate->instrument->total / nloops;
 		double		rows = planstate->instrument->ntuples / nloops;
+		double		min_r = planstate->instrument->min_tuples;
+		double		max_r = planstate->instrument->max_tuples;
+		double		min_t_ms = 1000.0 * planstate->instrument->min_t;
+		double		max_t_ms = 1000.0 * planstate->instrument->max_t;
 
 		if (es->format == EXPLAIN_FORMAT_TEXT)
 		{
-			if (es->timing)
-appendStringInfo(es->str,
- " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
- startup_ms, total_ms, rows, nloops);
+			if (nodeTag(plan) == T_NestLoop) {
+if (es->timing)
+	appendStringInfo(es->str,
+	 " (actual time=%.3f..%.3f min_time=%.3f max_time=%.3f min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+	 startup_ms, total_ms, min_t_ms, max_t_ms, min_r, rows, max_r, nloops);
+else
+	appendStringInfo(es->str,
+	 " (actual min_rows=%.0f rows=%.0f max_rows=%.0f loops=%.0f)",
+	 min_r, rows, max_r, nloops);
+			}
 			else
-appendStringInfo(es->str,
- " (actual rows=%.0f loops=%.0f)",
- rows, nloops);
+			{
+if (es->timing)
+	appendStringInfo(es->str,
+	 " (actual time=%.3f..%.3f rows=%.0f loops=%.0f)",
+	 startup_ms, total_ms, rows, nloops);
+else
+	appendStringInfo(es->str,
+	 " (actual rows=%.0f loops=%.0f)",
+	 rows, nloops);
+			}
 		}
 		else
 		{
-			if (es->timing)
+			if (nodeTag(plan) == T_NestLoop) {
+if (es->timing) {
+	ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+		 3, es);
+	ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+		 3, es);
+	ExplainPropertyFloat("Min Time", "s", min_t_ms,
+		 3, es);
+	ExplainPropertyFloat("Max Time", "s", max_t_ms,
+		 3, es);
+}
+ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+ExplainPropertyFloat("Min Rows", NULL, rows, 0, es);
+ExplainPropertyFloat("Max Rows", NULL, rows, 0, es);
+ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
+			}
+			else
 			{
-ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
-	 3, es);
-ExplainPropertyFloat("Actual Total Time", "s", total_ms,
-	 3, es);
+if (es->timing) {
+	ExplainPropertyFloat("Actual Startup Time", "s", startup_ms,
+		 3, es);
+	ExplainPropertyFloat("Actual Total Time", "s", total_ms,
+		 3, es);
+}
+ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
+ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 			}
-			ExplainPropertyFloat("Actual Rows", NULL, rows, 0, es);
-			ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es);
 		}
 	}
 	else if (es->analyze)
@@ -1599,6 +1634,7 @@