spark git commit: [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose
Repository: spark Updated Branches: refs/heads/master ea6028409 - 45df77b84 [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose Author: Cheng Hao hao.ch...@intel.com Closes #4496 from chenghao-intel/df_explain and squashes the following commits: 552aa58 [Cheng Hao] Add explain support for DF Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45df77b8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45df77b8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45df77b8 Branch: refs/heads/master Commit: 45df77b8418873a00d770e435358bf603765595f Parents: ea60284 Author: Cheng Hao hao.ch...@intel.com Authored: Tue Feb 10 19:40:51 2015 -0800 Committer: Michael Armbrust mich...@databricks.com Committed: Tue Feb 10 19:40:51 2015 -0800 -- .../src/main/scala/org/apache/spark/sql/Column.scala | 8 .../main/scala/org/apache/spark/sql/DataFrame.scala| 6 ++ .../scala/org/apache/spark/sql/DataFrameImpl.scala | 13 ++--- .../org/apache/spark/sql/execution/commands.scala | 7 +-- .../main/scala/org/apache/spark/sql/hive/HiveQl.scala | 8 +++- 5 files changed, 32 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/45df77b8/sql/core/src/main/scala/org/apache/spark/sql/Column.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 1011bf0..b0e9590 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -600,6 +600,14 @@ trait Column extends DataFrame { def desc: Column = exprToColumn(SortOrder(expr, Descending), computable = false) def asc: Column = exprToColumn(SortOrder(expr, Ascending), computable = false) + + override def explain(extended: Boolean): Unit = { +if (extended) { + println(expr) +} else { + println(expr.prettyString) +} + } } http://git-wip-us.apache.org/repos/asf/spark/blob/45df77b8/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index ca8d552..17900c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -124,6 +124,12 @@ trait DataFrame extends RDDApi[Row] { /** Prints the schema to the console in a nice tree format. */ def printSchema(): Unit + /** Prints the plans (logical and physical) to the console for debugging purpose. */ + def explain(extended: Boolean): Unit + + /** Only prints the physical plan to the console for debugging purpose. */ + def explain(): Unit = explain(false) + /** * Returns true if the `collect` and `take` methods can be run locally * (without any Spark executors). http://git-wip-us.apache.org/repos/asf/spark/blob/45df77b8/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala index 0134b03..9638ce0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala @@ -30,12 +30,11 @@ import org.apache.spark.api.python.SerDeUtil import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.sql.catalyst.{SqlParser, ScalaReflection} -import org.apache.spark.sql.catalyst.analysis.{EliminateAnalysisOperators, ResolvedStar, UnresolvedRelation} +import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, UnresolvedRelation} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{JoinType, Inner} import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.sideBySide -import org.apache.spark.sql.execution.{LogicalRDD, EvaluatePython} +import org.apache.spark.sql.execution.{ExplainCommand, LogicalRDD, EvaluatePython} import org.apache.spark.sql.json.JsonRDD import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{NumericType, StructType} @@ -115,6 +114,14 @@ private[sql] class DataFrameImpl protected[sql]( override def printSchema(): Unit = println(schema.treeString) + override def explain(extended: Boolean): Unit = { +ExplainCommand( + logicalPlan, + extended =
spark git commit: [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose
Repository: spark Updated Branches: refs/heads/branch-1.3 1056c5b1f - 7fa0d5f5c [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose Author: Cheng Hao hao.ch...@intel.com Closes #4496 from chenghao-intel/df_explain and squashes the following commits: 552aa58 [Cheng Hao] Add explain support for DF (cherry picked from commit 45df77b8418873a00d770e435358bf603765595f) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fa0d5f5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fa0d5f5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fa0d5f5 Branch: refs/heads/branch-1.3 Commit: 7fa0d5f5c8f8f712d5ed787b5731d4ac57eea7a7 Parents: 1056c5b Author: Cheng Hao hao.ch...@intel.com Authored: Tue Feb 10 19:40:51 2015 -0800 Committer: Michael Armbrust mich...@databricks.com Committed: Tue Feb 10 19:41:01 2015 -0800 -- .../src/main/scala/org/apache/spark/sql/Column.scala | 8 .../main/scala/org/apache/spark/sql/DataFrame.scala| 6 ++ .../scala/org/apache/spark/sql/DataFrameImpl.scala | 13 ++--- .../org/apache/spark/sql/execution/commands.scala | 7 +-- .../main/scala/org/apache/spark/sql/hive/HiveQl.scala | 8 +++- 5 files changed, 32 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7fa0d5f5/sql/core/src/main/scala/org/apache/spark/sql/Column.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 1011bf0..b0e9590 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -600,6 +600,14 @@ trait Column extends DataFrame { def desc: Column = exprToColumn(SortOrder(expr, Descending), computable = false) def asc: Column = exprToColumn(SortOrder(expr, Ascending), computable = false) + + override def explain(extended: Boolean): Unit = { +if (extended) { + println(expr) +} else { + println(expr.prettyString) +} + } } http://git-wip-us.apache.org/repos/asf/spark/blob/7fa0d5f5/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index ca8d552..17900c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -124,6 +124,12 @@ trait DataFrame extends RDDApi[Row] { /** Prints the schema to the console in a nice tree format. */ def printSchema(): Unit + /** Prints the plans (logical and physical) to the console for debugging purpose. */ + def explain(extended: Boolean): Unit + + /** Only prints the physical plan to the console for debugging purpose. */ + def explain(): Unit = explain(false) + /** * Returns true if the `collect` and `take` methods can be run locally * (without any Spark executors). http://git-wip-us.apache.org/repos/asf/spark/blob/7fa0d5f5/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala index 0134b03..9638ce0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala @@ -30,12 +30,11 @@ import org.apache.spark.api.python.SerDeUtil import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.sql.catalyst.{SqlParser, ScalaReflection} -import org.apache.spark.sql.catalyst.analysis.{EliminateAnalysisOperators, ResolvedStar, UnresolvedRelation} +import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, UnresolvedRelation} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{JoinType, Inner} import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.sideBySide -import org.apache.spark.sql.execution.{LogicalRDD, EvaluatePython} +import org.apache.spark.sql.execution.{ExplainCommand, LogicalRDD, EvaluatePython} import org.apache.spark.sql.json.JsonRDD import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{NumericType, StructType} @@ -115,6 +114,14 @@ private[sql] class DataFrameImpl protected[sql]( override def printSchema(): Unit = println(schema.treeString) +