Repository: spark
Updated Branches:
refs/heads/master c22810004 -> 3cb3ccce1
[SPARK-21196] Split codegen info of query plan into sequence
codegen info of query plan can be very long.
In debugging console / web page, it would be more readable if the subtrees and
corresponding codegen are split into sequence.
Example:
```java
codegenStringSeq(sql("select 1").queryExecution.executedPlan)
```
The example will return Seq[(String, String)] of length 1, containing the
subtree as string and the corresponding generated code.
The subtree as string:
> (*Project [1 AS 1#0]
> +- Scan OneRowRelation[]
The generated code:
```java
/* 001 */ public Object generate(Object[] references) {
/* 002 */ return new GeneratedIterator(references);
/* 003 */ }
/* 004 */
/* 005 */ final class GeneratedIterator extends
org.apache.spark.sql.execution.BufferedRowIterator {
/* 006 */ private Object[] references;
/* 007 */ private scala.collection.Iterator[] inputs;
/* 008 */ private scala.collection.Iterator inputadapter_input;
/* 009 */ private UnsafeRow project_result;
/* 010 */ private
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder project_holder;
/* 011 */ private
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
project_rowWriter;
/* 012 */
/* 013 */ public GeneratedIterator(Object[] references) {
/* 014 */ this.references = references;
/* 015 */ }
/* 016 */
/* 017 */ public void init(int index, scala.collection.Iterator[] inputs) {
/* 018 */ partitionIndex = index;
/* 019 */ this.inputs = inputs;
/* 020 */ inputadapter_input = inputs[0];
/* 021 */ project_result = new UnsafeRow(1);
/* 022 */ project_holder = new
org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(project_result,
0);
/* 023 */ project_rowWriter = new
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(project_holder,
1);
/* 024 */
/* 025 */ }
/* 026 */
/* 027 */ protected void processNext() throws java.io.IOException {
/* 028 */ while (inputadapter_input.hasNext() && !stopEarly()) {
/* 029 */ InternalRow inputadapter_row = (InternalRow)
inputadapter_input.next();
/* 030 */ project_rowWriter.write(0, 1);
/* 031 */ append(project_result);
/* 032 */ if (shouldStop()) return;
/* 033 */ }
/* 034 */ }
/* 035 */
/* 036 */ }
```
## What changes were proposed in this pull request?
add method codegenToSeq: split codegen info of query plan into sequence
## How was this patch tested?
unit test
cloud-fan gatorsmile
Please review http://spark.apache.org/contributing.html before opening a pull
request.
Author: Wang Gengliang <[email protected]>
Closes #18409 from gengliangwang/codegen.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3cb3ccce
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3cb3ccce
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3cb3ccce
Branch: refs/heads/master
Commit: 3cb3ccce120fa9f0273133912624b877b42d95fd
Parents: c228100
Author: Wang Gengliang <[email protected]>
Authored: Tue Jun 27 17:24:46 2017 +0800
Committer: Wenchen Fan <[email protected]>
Committed: Tue Jun 27 17:24:46 2017 +0800
----------------------------------------------------------------------
.../spark/sql/execution/QueryExecution.scala | 9 +++++
.../spark/sql/execution/debug/package.scala | 35 +++++++++++++++-----
.../sql/execution/debug/DebuggingSuite.scala | 7 ++++
3 files changed, 43 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3cb3ccce/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index c7cac33..9533144 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -245,5 +245,14 @@ class QueryExecution(val sparkSession: SparkSession, val
logical: LogicalPlan) {
println(org.apache.spark.sql.execution.debug.codegenString(executedPlan))
// scalastyle:on println
}
+
+ /**
+ * Get WholeStageCodegenExec subtrees and the codegen in a query plan
+ *
+ * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
+ */
+ def codegenToSeq(): Seq[(String, String)] = {
+ org.apache.spark.sql.execution.debug.codegenStringSeq(executedPlan)
+ }
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3cb3ccce/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 0395c43..a717cbd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -50,7 +50,31 @@ package object debug {
// scalastyle:on println
}
+ /**
+ * Get WholeStageCodegenExec subtrees and the codegen in a query plan into
one String
+ *
+ * @param plan the query plan for codegen
+ * @return single String containing all WholeStageCodegen subtrees and
corresponding codegen
+ */
def codegenString(plan: SparkPlan): String = {
+ val codegenSeq = codegenStringSeq(plan)
+ var output = s"Found ${codegenSeq.size} WholeStageCodegen subtrees.\n"
+ for (((subtree, code), i) <- codegenSeq.zipWithIndex) {
+ output += s"== Subtree ${i + 1} / ${codegenSeq.size} ==\n"
+ output += subtree
+ output += "\nGenerated code:\n"
+ output += s"${code}\n"
+ }
+ output
+ }
+
+ /**
+ * Get WholeStageCodegenExec subtrees and the codegen in a query plan
+ *
+ * @param plan the query plan for codegen
+ * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
+ */
+ def codegenStringSeq(plan: SparkPlan): Seq[(String, String)] = {
val codegenSubtrees = new
collection.mutable.HashSet[WholeStageCodegenExec]()
plan transform {
case s: WholeStageCodegenExec =>
@@ -58,15 +82,10 @@ package object debug {
s
case s => s
}
- var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
- for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
- output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
- output += s
- output += "\nGenerated code:\n"
- val (_, source) = s.doCodeGen()
- output += s"${CodeFormatter.format(source)}\n"
+ codegenSubtrees.toSeq.map { subtree =>
+ val (_, source) = subtree.doCodeGen()
+ (subtree.toString, CodeFormatter.format(source))
}
- output
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/3cb3ccce/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 4fc52c9..adcaf2d 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -38,4 +38,11 @@ class DebuggingSuite extends SparkFunSuite with
SharedSQLContext {
assert(res.contains("Subtree 2 / 2"))
assert(res.contains("Object[]"))
}
+
+ test("debugCodegenStringSeq") {
+ val res =
codegenStringSeq(spark.range(10).groupBy("id").count().queryExecution.executedPlan)
+ assert(res.length == 2)
+ assert(res.forall{ case (subtree, code) =>
+ subtree.contains("Range") && code.contains("Object[]")})
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]