spark git commit: [SPARK-17167][2.0][SQL] Issue Exceptions when Analyze Table on In-Memory Cataloged Tables

hvanhovell Thu, 25 Aug 2016 05:39:04 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 48ecf3d00 -> 2b32a442d



[SPARK-17167][2.0][SQL] Issue Exceptions when Analyze Table on In-Memory 
Cataloged Tables

### What changes were proposed in this pull request?
Currently, `Analyze Table` is only used for Hive-serde tables. We should issue 
exceptions in all the other cases. When the tables are data source tables, we 
issued an exception. However, when tables are In-Memory Cataloged tables, we do 
not issue any exception.

This PR is to issue an exception when the tables are in-memory cataloged. For 
example,
```SQL
CREATE TABLE tbl(a INT, b INT) USING parquet
```
`tbl` is a `SimpleCatalogRelation` when the hive support is not enabled.

### How was this patch tested?
Added two test cases. One of them is just to improve the test coverage when the 
analyzed table is data source tables.

Author: gatorsmile <[email protected]>

Closes #14781 from gatorsmile/analyzeInMemoryTable2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b32a442
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b32a442
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b32a442

Branch: refs/heads/branch-2.0
Commit: 2b32a442dfbc8494c30dcb2f6869c9dc7f258ada
Parents: 48ecf3d
Author: gatorsmile <[email protected]>
Authored: Thu Aug 25 14:38:41 2016 +0200
Committer: Herman van Hovell <[email protected]>
Committed: Thu Aug 25 14:38:41 2016 +0200

----------------------------------------------------------------------
 .../sql/execution/command/AnalyzeTableCommand.scala  |  4 ++--
 .../spark/sql/execution/command/DDLSuite.scala       | 11 +++++++++++
 .../spark/sql/hive/execution/HiveDDLSuite.scala      | 15 +++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index a469d4d..9509b66 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable, 
SimpleCatalogRelation}
 
 
 /**
@@ -41,7 +41,7 @@ case class AnalyzeTableCommand(tableName: String) extends 
RunnableCommand {
     val relation = 
EliminateSubqueryAliases(sessionState.catalog.lookupRelation(tableIdent))
 
     relation match {
-      case relation: CatalogRelation =>
+      case relation: CatalogRelation if 
!relation.isInstanceOf[SimpleCatalogRelation] =>
         val catalogTable: CatalogTable = relation.catalogTable
         // This method is mainly based on
         // 
org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)

http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index d70cae7..9565471 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -395,6 +395,17 @@ class DDLSuite extends QueryTest with SharedSQLContext 
with BeforeAndAfterEach {
     assert(catalog.getTableMetadata(tableIdent1) === expectedTable)
   }
 
+  test("Analyze in-memory cataloged tables(SimpleCatalogRelation)") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(a INT, b INT) USING parquet")
+      val e = intercept[AnalysisException] {
+        sql("ANALYZE TABLE tbl COMPUTE STATISTICS")
+      }.getMessage
+      assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " +
+        "but 'tbl' is a SimpleCatalogRelation"))
+    }
+  }
+
   test("create table using") {
     val catalog = spark.sessionState.catalog
     withTable("tbl") {

http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 9228242..df6cd56 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -594,6 +594,21 @@ class HiveDDLSuite
     }
   }
 
+  test("Analyze data source tables(LogicalRelation)") {
+    withTable("t1") {
+      withTempPath { dir =>
+        val path = dir.getCanonicalPath
+        spark.range(1).write.format("parquet").save(path)
+        sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')")
+        val e = intercept[AnalysisException] {
+          sql("ANALYZE TABLE t1 COMPUTE STATISTICS")
+        }.getMessage
+        assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " +
+          "but 't1' is a LogicalRelation"))
+      }
+    }
+  }
+
   test("desc table for data source table") {
     withTable("tab1") {
       val tabName = "tab1"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-17167][2.0][SQL] Issue Exceptions when Analyze Table on In-Memory Cataloged Tables

Reply via email to