Repository: spark Updated Branches: refs/heads/branch-2.0 48ecf3d00 -> 2b32a442d
[SPARK-17167][2.0][SQL] Issue Exceptions when Analyze Table on In-Memory Cataloged Tables ### What changes were proposed in this pull request? Currently, `Analyze Table` is only used for Hive-serde tables. We should issue exceptions in all the other cases. When the tables are data source tables, we issued an exception. However, when tables are In-Memory Cataloged tables, we do not issue any exception. This PR is to issue an exception when the tables are in-memory cataloged. For example, ```SQL CREATE TABLE tbl(a INT, b INT) USING parquet ``` `tbl` is a `SimpleCatalogRelation` when the hive support is not enabled. ### How was this patch tested? Added two test cases. One of them is just to improve the test coverage when the analyzed table is data source tables. Author: gatorsmile <[email protected]> Closes #14781 from gatorsmile/analyzeInMemoryTable2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b32a442 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b32a442 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b32a442 Branch: refs/heads/branch-2.0 Commit: 2b32a442dfbc8494c30dcb2f6869c9dc7f258ada Parents: 48ecf3d Author: gatorsmile <[email protected]> Authored: Thu Aug 25 14:38:41 2016 +0200 Committer: Herman van Hovell <[email protected]> Committed: Thu Aug 25 14:38:41 2016 +0200 ---------------------------------------------------------------------- .../sql/execution/command/AnalyzeTableCommand.scala | 4 ++-- .../spark/sql/execution/command/DDLSuite.scala | 11 +++++++++++ .../spark/sql/hive/execution/HiveDDLSuite.scala | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala index a469d4d..9509b66 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala @@ -23,7 +23,7 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases -import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable} +import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable, SimpleCatalogRelation} /** @@ -41,7 +41,7 @@ case class AnalyzeTableCommand(tableName: String) extends RunnableCommand { val relation = EliminateSubqueryAliases(sessionState.catalog.lookupRelation(tableIdent)) relation match { - case relation: CatalogRelation => + case relation: CatalogRelation if !relation.isInstanceOf[SimpleCatalogRelation] => val catalogTable: CatalogTable = relation.catalogTable // This method is mainly based on // org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table) http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index d70cae7..9565471 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -395,6 +395,17 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { assert(catalog.getTableMetadata(tableIdent1) === expectedTable) } + test("Analyze in-memory cataloged tables(SimpleCatalogRelation)") { + withTable("tbl") { + sql("CREATE TABLE tbl(a INT, b INT) USING parquet") + val e = intercept[AnalysisException] { + sql("ANALYZE TABLE tbl COMPUTE STATISTICS") + }.getMessage + assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " + + "but 'tbl' is a SimpleCatalogRelation")) + } + } + test("create table using") { val catalog = spark.sessionState.catalog withTable("tbl") { http://git-wip-us.apache.org/repos/asf/spark/blob/2b32a442/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 9228242..df6cd56 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -594,6 +594,21 @@ class HiveDDLSuite } } + test("Analyze data source tables(LogicalRelation)") { + withTable("t1") { + withTempPath { dir => + val path = dir.getCanonicalPath + spark.range(1).write.format("parquet").save(path) + sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')") + val e = intercept[AnalysisException] { + sql("ANALYZE TABLE t1 COMPUTE STATISTICS") + }.getMessage + assert(e.contains("ANALYZE TABLE is only supported for Hive tables, " + + "but 't1' is a LogicalRelation")) + } + } + } + test("desc table for data source table") { withTable("tab1") { val tabName = "tab1" --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
