This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2dc0527 [SPARK-38322][SQL] Support query stage show runtime
statistics in formatted explain mode
2dc0527 is described below
commit 2dc0527fb6462b6849d3c53c6d83392a8e37cdcc
Author: ulysses-you <[email protected]>
AuthorDate: Fri Feb 25 14:59:10 2022 +0800
[SPARK-38322][SQL] Support query stage show runtime statistics in formatted
explain mode
### What changes were proposed in this pull request?
Add query stage statistics information in formatted explain mode.
### Why are the changes needed?
The formatted explalin mode is the powerful explain mode to show the
details of query plan. In AQE, the query stage know its statistics if has
already materialized. So it can help to quick check the conversion of plan,
e.g. join selection.
A simple example:
```sql
SELECT * FROM t JOIN t2 ON t.c = t2.c;
```
```sql
== Physical Plan ==
AdaptiveSparkPlan (21)
+- == Final Plan ==
* SortMergeJoin Inner (13)
:- * Sort (6)
: +- AQEShuffleRead (5)
: +- ShuffleQueryStage (4), Statistics(sizeInBytes=16.0 B,
rowCount=1)
: +- Exchange (3)
: +- * Filter (2)
: +- Scan hive default.t (1)
+- * Sort (12)
+- AQEShuffleRead (11)
+- ShuffleQueryStage (10), Statistics(sizeInBytes=16.0 B,
rowCount=1)
+- Exchange (9)
+- * Filter (8)
+- Scan hive default.t2 (7)
+- == Initial Plan ==
SortMergeJoin Inner (20)
:- Sort (16)
: +- Exchange (15)
: +- Filter (14)
: +- Scan hive default.t (1)
+- Sort (19)
+- Exchange (18)
+- Filter (17)
+- Scan hive default.t2 (7)
```
### Does this PR introduce _any_ user-facing change?
no, only change the output of explain in AQE
### How was this patch tested?
Add test
Closes #35658 from ulysses-you/exchange-statistics.
Authored-by: ulysses-you <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/execution/adaptive/QueryStageExec.scala | 4 ++++
.../test/scala/org/apache/spark/sql/ExplainSuite.scala | 16 ++++++++++++++++
2 files changed, 20 insertions(+)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index e2f763e..ac1968d 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -124,6 +124,10 @@ abstract class QueryStageExec extends LeafExecNode {
protected override def stringArgs: Iterator[Any] = Iterator.single(id)
+ override def simpleStringWithNodeId(): String = {
+ super.simpleStringWithNodeId() + computeStats().map(", " +
_.toString).getOrElse("")
+ }
+
override def generateTreeString(
depth: Int,
lastChildren: Seq[Boolean],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 67240c5..a5403ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -735,6 +735,22 @@ class ExplainSuiteAE extends ExplainSuiteHelper with
EnableAdaptiveExecutionSuit
}
}
}
+
+ test("SPARK-38322: Support query stage show runtime statistics in formatted
explain mode") {
+ val df = Seq(1, 2).toDF("c").distinct()
+ val statistics = "Statistics(sizeInBytes=32.0 B, rowCount=2)"
+
+ checkKeywordsNotExistsInExplain(
+ df,
+ FormattedMode,
+ statistics)
+
+ df.collect()
+ checkKeywordsExistsInExplain(
+ df,
+ FormattedMode,
+ statistics)
+ }
}
case class ExplainSingleData(id: Int)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]