Repository: spark Updated Branches: refs/heads/master 8faa5217b -> 3a3c9ffbd
[SPARK-17518][SQL] Block Users to Specify the Internal Data Source Provider Hive ### What changes were proposed in this pull request? In Spark 2.1, we introduced a new internal provider `hive` for telling Hive serde tables from data source tables. This PR is to block users to specify this in `DataFrameWriter` and SQL APIs. ### How was this patch tested? Added a test case Author: gatorsmile <gatorsm...@gmail.com> Closes #15073 from gatorsmile/formatHive. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a3c9ffb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a3c9ffb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a3c9ffb Branch: refs/heads/master Commit: 3a3c9ffbd282244407e9437c2b02ae7e062dd183 Parents: 8faa521 Author: gatorsmile <gatorsm...@gmail.com> Authored: Sun Sep 18 15:37:15 2016 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Sun Sep 18 15:37:15 2016 +0800 ---------------------------------------------------------------------- .../org/apache/spark/sql/DataFrameWriter.scala | 3 ++ .../spark/sql/execution/SparkSqlParser.scala | 5 +- .../apache/spark/sql/internal/CatalogImpl.scala | 2 +- .../spark/sql/internal/CatalogSuite.scala | 7 +++ .../apache/spark/sql/hive/HiveStrategies.scala | 2 +- .../sql/hive/MetastoreDataSourcesSuite.scala | 51 ++++++++++++++++++++ 6 files changed, 67 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index e137f07..64d3422 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -357,6 +357,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } private def saveAsTable(tableIdent: TableIdentifier): Unit = { + if (source.toLowerCase == "hive") { + throw new AnalysisException("Cannot create hive serde table with saveAsTable API") + } val tableExists = df.sparkSession.sessionState.catalog.tableExists(tableIdent) http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 7ba1a9f..5359ced 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -22,7 +22,7 @@ import scala.collection.JavaConverters._ import org.antlr.v4.runtime.{ParserRuleContext, Token} import org.antlr.v4.runtime.tree.TerminalNode -import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.{AnalysisException, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.parser._ @@ -316,6 +316,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { } val options = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty) val provider = ctx.tableProvider.qualifiedName.getText + if (provider.toLowerCase == "hive") { + throw new AnalysisException("Cannot create hive serde table with CREATE TABLE USING") + } val schema = Option(ctx.colTypeList()).map(createStructType) val partitionColumnNames = Option(ctx.partitionColumnNames) http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index 1f87f0e..78ad710 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -258,7 +258,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { source: String, schema: StructType, options: Map[String, String]): DataFrame = { - if (source == "hive") { + if (source.toLowerCase == "hive") { throw new AnalysisException("Cannot create hive serde table with createExternalTable API.") } http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala index b221eed..549fd63 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala @@ -322,6 +322,13 @@ class CatalogSuite assert(e2.message == "Cannot create a file-based external data source table without path") } + test("createExternalTable should fail if provider is hive") { + val e = intercept[AnalysisException] { + spark.catalog.createExternalTable("tbl", "HiVe", Map.empty[String, String]) + } + assert(e.message.contains("Cannot create hive serde table with createExternalTable API")) + } + // TODO: add tests for the rest of them } http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index fb11c84..9d29309 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -61,7 +61,7 @@ private[hive] trait HiveStrategies { // `ErrorIfExists` mode, and `DataFrameWriter.saveAsTable` doesn't support hive serde // tables yet. if (mode == SaveMode.Append || mode == SaveMode.Overwrite) { - throw new AnalysisException("" + + throw new AnalysisException( "CTAS for hive serde tables does not support append or overwrite semantics.") } http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 3466733..0f331ba 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.hive.HiveExternalCatalog._ import org.apache.spark.sql.hive.client.HiveClient @@ -1151,6 +1152,56 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv } } + test("save API - format hive") { + withTempDir { dir => + val path = dir.getCanonicalPath + val e = intercept[ClassNotFoundException] { + spark.range(10).write.format("hive").mode(SaveMode.Ignore).save(path) + }.getMessage + assert(e.contains("Failed to find data source: hive")) + } + } + + test("saveAsTable API - format hive") { + val tableName = "tab1" + withTable(tableName) { + val e = intercept[AnalysisException] { + spark.range(10).write.format("hive").mode(SaveMode.Overwrite).saveAsTable(tableName) + }.getMessage + assert(e.contains("Cannot create hive serde table with saveAsTable API")) + } + } + + test("create a data source table using hive") { + val tableName = "tab1" + withTable (tableName) { + val e = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE $tableName + |(col1 int) + |USING hive + """.stripMargin) + }.getMessage + assert(e.contains("Cannot create hive serde table with CREATE TABLE USING")) + } + } + + test("create a temp view using hive") { + val tableName = "tab1" + withTable (tableName) { + val e = intercept[ClassNotFoundException] { + sql( + s""" + |CREATE TEMPORARY VIEW $tableName + |(col1 int) + |USING hive + """.stripMargin) + }.getMessage + assert(e.contains("Failed to find data source: hive")) + } + } + test("saveAsTable - source and target are the same table") { val tableName = "tab1" withTable(tableName) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org