spark git commit: [SPARK-17751][SQL] Remove spark.sql.eagerAnalysis and Output the Plan if Existed in AnalysisException

hvanhovell Mon, 17 Oct 2016 11:33:30 -0700

Repository: spark
Updated Branches:
  refs/heads/master c7ac027d5 -> d88a1bae6



[SPARK-17751][SQL] Remove spark.sql.eagerAnalysis and Output the Plan if 
Existed in AnalysisException

### What changes were proposed in this pull request?
Dataset always does eager analysis now. Thus, `spark.sql.eagerAnalysis` is not 
used any more. Thus, we need to remove it.

This PR also outputs the plan. Without the fix, the analysis error is like
```
cannot resolve '`k1`' given input columns: [k, v]; line 1 pos 12
```

After the fix, the analysis error becomes:
```
org.apache.spark.sql.AnalysisException: cannot resolve '`k1`' given input 
columns: [k, v]; line 1 pos 12;
'Project [unresolvedalias(CASE WHEN ('k1 = 2) THEN 22 WHEN ('k1 = 4) THEN 44 
ELSE 0 END, None), v#6]
+- SubqueryAlias t
   +- Project [_1#2 AS k#5, _2#3 AS v#6]
      +- LocalRelation [_1#2, _2#3]
```

### How was this patch tested?
N/A

Author: gatorsmile <[email protected]>

Closes #15316 from gatorsmile/eagerAnalysis.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d88a1bae
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d88a1bae
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d88a1bae

Branch: refs/heads/master
Commit: d88a1bae6a9c975c39549ec2326d839ea93949b2
Parents: c7ac027
Author: gatorsmile <[email protected]>
Authored: Mon Oct 17 11:33:06 2016 -0700
Committer: Herman van Hovell <[email protected]>
Committed: Mon Oct 17 11:33:06 2016 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/AnalysisException.scala    |  7 +++++++
 .../org/apache/spark/sql/execution/debug/package.scala    |  9 ---------
 .../scala/org/apache/spark/sql/internal/SQLConf.scala     | 10 ----------
 .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala    |  3 +++
 4 files changed, 10 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d88a1bae/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index f300330..7defb9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -42,6 +42,13 @@ class AnalysisException protected[sql] (
   }
 
   override def getMessage: String = {
+    val planAnnotation = plan.map(p => s";\n$p").getOrElse("")
+    getSimpleMessage + planAnnotation
+  }
+
+  // Outputs an exception without the logical plan.
+  // For testing only
+  def getSimpleMessage: String = {
     val lineAnnotation = line.map(l => s" line $l").getOrElse("")
     val positionAnnotation = startPosition.map(p => s" pos $p").getOrElse("")
     s"$message;$lineAnnotation$positionAnnotation"

http://git-wip-us.apache.org/repos/asf/spark/blob/d88a1bae/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index d321f4c..dd9d837 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -70,15 +70,6 @@ package object debug {
   }
 
   /**
-   * Augments [[SparkSession]] with debug methods.
-   */
-  implicit class DebugSQLContext(sparkSession: SparkSession) {
-    def debug(): Unit = {
-      sparkSession.conf.set(SQLConf.DATAFRAME_EAGER_ANALYSIS.key, false)
-    }
-  }
-
-  /**
    * Augments [[Dataset]]s with debug methods.
    */
   implicit class DebugQuery(query: Dataset[_]) extends Logging {

http://git-wip-us.apache.org/repos/asf/spark/blob/d88a1bae/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index a055e01..8afd39d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -388,14 +388,6 @@ object SQLConf {
       .intConf
       .createWithDefault(32)
 
-  // Whether to perform eager analysis when constructing a dataframe.
-  // Set to false when debugging requires the ability to look at invalid query 
plans.
-  val DATAFRAME_EAGER_ANALYSIS = SQLConfigBuilder("spark.sql.eagerAnalysis")
-    .internal()
-    .doc("When true, eagerly applies query analysis on DataFrame operations.")
-    .booleanConf
-    .createWithDefault(true)
-
   // Whether to automatically resolve ambiguity in join conditions for 
self-joins.
   // See SPARK-6231.
   val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
@@ -748,8 +740,6 @@ private[sql] class SQLConf extends Serializable with 
CatalystConf with Logging {
 
   def bucketingEnabled: Boolean = getConf(SQLConf.BUCKETING_ENABLED)
 
-  def dataFrameEagerAnalysis: Boolean = getConf(DATAFRAME_EAGER_ANALYSIS)
-
   def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
     getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d88a1bae/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 55d5a56..02841d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -220,6 +220,9 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSQLContext {
       if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, 
answer.sorted)
 
     } catch {
+      case a: AnalysisException if a.plan.nonEmpty =>
+        // Do not output the logical plan tree which contains expression IDs.
+        (StructType(Seq.empty), Seq(a.getClass.getName, a.getSimpleMessage))
       case NonFatal(e) =>
         // If there is an exception, put the exception class followed by the 
message.
         (StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-17751][SQL] Remove spark.sql.eagerAnalysis and Output the Plan if Existed in AnalysisException

Reply via email to