spark git commit: [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose

2015-02-10 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master ea6028409 - 45df77b84


[SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose

Author: Cheng Hao hao.ch...@intel.com

Closes #4496 from chenghao-intel/df_explain and squashes the following commits:

552aa58 [Cheng Hao] Add explain support for DF


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45df77b8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45df77b8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45df77b8

Branch: refs/heads/master
Commit: 45df77b8418873a00d770e435358bf603765595f
Parents: ea60284
Author: Cheng Hao hao.ch...@intel.com
Authored: Tue Feb 10 19:40:51 2015 -0800
Committer: Michael Armbrust mich...@databricks.com
Committed: Tue Feb 10 19:40:51 2015 -0800

--
 .../src/main/scala/org/apache/spark/sql/Column.scala   |  8 
 .../main/scala/org/apache/spark/sql/DataFrame.scala|  6 ++
 .../scala/org/apache/spark/sql/DataFrameImpl.scala | 13 ++---
 .../org/apache/spark/sql/execution/commands.scala  |  7 +--
 .../main/scala/org/apache/spark/sql/hive/HiveQl.scala  |  8 +++-
 5 files changed, 32 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/45df77b8/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 1011bf0..b0e9590 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -600,6 +600,14 @@ trait Column extends DataFrame {
   def desc: Column = exprToColumn(SortOrder(expr, Descending), computable = 
false)
 
   def asc: Column = exprToColumn(SortOrder(expr, Ascending), computable = 
false)
+
+  override def explain(extended: Boolean): Unit = {
+if (extended) {
+  println(expr)
+} else {
+  println(expr.prettyString)
+}
+  }
 }
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/45df77b8/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index ca8d552..17900c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -124,6 +124,12 @@ trait DataFrame extends RDDApi[Row] {
   /** Prints the schema to the console in a nice tree format. */
   def printSchema(): Unit
 
+  /** Prints the plans (logical and physical) to the console for debugging 
purpose. */
+  def explain(extended: Boolean): Unit
+
+  /** Only prints the physical plan to the console for debugging purpose. */
+  def explain(): Unit = explain(false)
+
   /**
* Returns true if the `collect` and `take` methods can be run locally
* (without any Spark executors).

http://git-wip-us.apache.org/repos/asf/spark/blob/45df77b8/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
index 0134b03..9638ce0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
@@ -30,12 +30,11 @@ import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.sql.catalyst.{SqlParser, ScalaReflection}
-import org.apache.spark.sql.catalyst.analysis.{EliminateAnalysisOperators, 
ResolvedStar, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, 
UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.sideBySide
-import org.apache.spark.sql.execution.{LogicalRDD, EvaluatePython}
+import org.apache.spark.sql.execution.{ExplainCommand, LogicalRDD, 
EvaluatePython}
 import org.apache.spark.sql.json.JsonRDD
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{NumericType, StructType}
@@ -115,6 +114,14 @@ private[sql] class DataFrameImpl protected[sql](
 
   override def printSchema(): Unit = println(schema.treeString)
 
+  override def explain(extended: Boolean): Unit = {
+ExplainCommand(
+  logicalPlan,
+  extended = 

spark git commit: [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose

2015-02-10 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 1056c5b1f - 7fa0d5f5c


[SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose

Author: Cheng Hao hao.ch...@intel.com

Closes #4496 from chenghao-intel/df_explain and squashes the following commits:

552aa58 [Cheng Hao] Add explain support for DF

(cherry picked from commit 45df77b8418873a00d770e435358bf603765595f)
Signed-off-by: Michael Armbrust mich...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fa0d5f5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fa0d5f5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fa0d5f5

Branch: refs/heads/branch-1.3
Commit: 7fa0d5f5c8f8f712d5ed787b5731d4ac57eea7a7
Parents: 1056c5b
Author: Cheng Hao hao.ch...@intel.com
Authored: Tue Feb 10 19:40:51 2015 -0800
Committer: Michael Armbrust mich...@databricks.com
Committed: Tue Feb 10 19:41:01 2015 -0800

--
 .../src/main/scala/org/apache/spark/sql/Column.scala   |  8 
 .../main/scala/org/apache/spark/sql/DataFrame.scala|  6 ++
 .../scala/org/apache/spark/sql/DataFrameImpl.scala | 13 ++---
 .../org/apache/spark/sql/execution/commands.scala  |  7 +--
 .../main/scala/org/apache/spark/sql/hive/HiveQl.scala  |  8 +++-
 5 files changed, 32 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7fa0d5f5/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 1011bf0..b0e9590 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -600,6 +600,14 @@ trait Column extends DataFrame {
   def desc: Column = exprToColumn(SortOrder(expr, Descending), computable = 
false)
 
   def asc: Column = exprToColumn(SortOrder(expr, Ascending), computable = 
false)
+
+  override def explain(extended: Boolean): Unit = {
+if (extended) {
+  println(expr)
+} else {
+  println(expr.prettyString)
+}
+  }
 }
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/7fa0d5f5/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index ca8d552..17900c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -124,6 +124,12 @@ trait DataFrame extends RDDApi[Row] {
   /** Prints the schema to the console in a nice tree format. */
   def printSchema(): Unit
 
+  /** Prints the plans (logical and physical) to the console for debugging 
purpose. */
+  def explain(extended: Boolean): Unit
+
+  /** Only prints the physical plan to the console for debugging purpose. */
+  def explain(): Unit = explain(false)
+
   /**
* Returns true if the `collect` and `take` methods can be run locally
* (without any Spark executors).

http://git-wip-us.apache.org/repos/asf/spark/blob/7fa0d5f5/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
index 0134b03..9638ce0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala
@@ -30,12 +30,11 @@ import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.sql.catalyst.{SqlParser, ScalaReflection}
-import org.apache.spark.sql.catalyst.analysis.{EliminateAnalysisOperators, 
ResolvedStar, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedStar, 
UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.sideBySide
-import org.apache.spark.sql.execution.{LogicalRDD, EvaluatePython}
+import org.apache.spark.sql.execution.{ExplainCommand, LogicalRDD, 
EvaluatePython}
 import org.apache.spark.sql.json.JsonRDD
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{NumericType, StructType}
@@ -115,6 +114,14 @@ private[sql] class DataFrameImpl protected[sql](
 
   override def printSchema(): Unit = println(schema.treeString)
 
+