[GitHub] spark pull request #16626: [SPARK-19261][SQL] Alter add columns for Hive ser...

xwu0226 Mon, 20 Mar 2017 18:09:15 -0700

Github user xwu0226 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16626#discussion_r107053921
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
---
    @@ -2178,4 +2178,138 @@ abstract class DDLSuite extends QueryTest with 
SQLTestUtils {
           }
         }
       }
    +
    +  val supportedNativeFileFormatsForAlterTableAddColumns = Seq("parquet", 
"json", "csv")
    +
    +  supportedNativeFileFormatsForAlterTableAddColumns.foreach { provider =>
    +    test(s"alter datasource table add columns - $provider") {
    +      withTable("t1") {
    +        sql(s"CREATE TABLE t1 (c1 int) USING $provider")
    +        sql("INSERT INTO t1 VALUES (1)")
    +        sql("ALTER TABLE t1 ADD COLUMNS (c2 int)")
    +        checkAnswer(
    +          spark.table("t1"),
    +          Seq(Row(1, null))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c2 is null"),
    +          Seq(Row(1, null))
    +        )
    +
    +        sql("INSERT INTO t1 VALUES (3, 2)")
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c2 = 2"),
    +          Seq(Row(3, 2))
    +        )
    +      }
    +    }
    +  }
    +
    +  supportedNativeFileFormatsForAlterTableAddColumns.foreach { provider =>
    +    test(s"alter datasource table add columns - partitioned - $provider") {
    +      withTable("t1") {
    +        sql(s"CREATE TABLE t1 (c1 int, c2 int) USING $provider PARTITIONED 
BY (c2)")
    +        sql("INSERT INTO t1 PARTITION(c2 = 2) VALUES (1)")
    +        sql("ALTER TABLE t1 ADD COLUMNS (c3 int)")
    +        checkAnswer(
    +          spark.table("t1"),
    +          Seq(Row(1, null, 2))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c3 is null"),
    +          Seq(Row(1, null, 2))
    +        )
    +        sql("INSERT INTO t1 PARTITION(c2 =1) VALUES (2, 3)")
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c3 = 3"),
    +          Seq(Row(2, 3, 1))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM t1 WHERE c2 = 1"),
    +          Seq(Row(2, 3, 1))
    +        )
    +      }
    +    }
    +  }
    +
    +  test("alter datasource table add columns - text format not supported") {
    +    withTable("t1") {
    +      sql("CREATE TABLE t1 (c1 int) USING text")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE t1 ADD COLUMNS (c2 int)")
    +      }.getMessage
    +      assert(e.contains("ALTER ADD COLUMNS does not support datasource 
table with type"))
    +    }
    +  }
    +
    +  test("alter table add columns -- not support temp view") {
    +    withTempView("tmp_v") {
    +      sql("CREATE TEMPORARY VIEW tmp_v AS SELECT 1 AS c1, 2 AS c2")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE tmp_v ADD COLUMNS (c3 INT)")
    +      }
    +      assert(e.message.contains("ALTER ADD COLUMNS does not support 
views"))
    +    }
    +  }
    +
    +  test("alter table add columns -- not support view") {
    +    withView("v1") {
    +      sql("CREATE VIEW v1 AS SELECT 1 AS c1, 2 AS c2")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE v1 ADD COLUMNS (c3 INT)")
    +      }
    +      assert(e.message.contains("ALTER ADD COLUMNS does not support 
views"))
    +    }
    +  }
    +
    +  test("alter table add columns with existing column name") {
    +    withTable("t1") {
    +      sql("CREATE TABLE t1 (c1 int) USING PARQUET")
    +      val e = intercept[AnalysisException] {
    +        sql("ALTER TABLE t1 ADD COLUMNS (c1 string)")
    +      }.getMessage
    +      assert(e.contains("Found duplicate column(s)"))
    +    }
    +  }
    +
    +  test("alter table add columns to table referenced by a view") {
    +    withTable("t1") {
    +      withView("v1") {
    +        sql("CREATE TABLE t1 (c1 int, c2 int) USING PARQUET")
    +        sql("CREATE VIEW v1 AS SELECT * FROM t1")
    +        val originViewSchema = sql("SELECT * FROM v1").schema
    +        sql("ALTER TABLE t1 ADD COLUMNS (c3 int)")
    +        assert(sql("SELECT * FROM v1").schema == originViewSchema)
    +      }
    +    }
    +  }
    +
    +  Seq("true", "false").foreach { caseSensitive =>
    +    test(s"alter table add columns with existing column name - 
caseSensitive $caseSensitive") {
    +      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
    +        withTable("t1") {
    +          sql("CREATE TABLE t1 (c1 int) USING PARQUET")
    +          if (caseSensitive == "false") {
    +            val e = intercept[AnalysisException] {
    +              sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
    +            }.getMessage
    +            assert(e.contains("Found duplicate column(s)"))
    +          } else {
    +            if (isUsingHiveMetastore) {
    +              // hive catalog will still complains that c1 is duplicate 
column name because hive
    +              // identifiers are case insensitive.
    --- End diff --
    
    @cloud-fan I just tested the data source table, like `create table t1 (c1 
int, C1 int) using parquet` with `spark.sql.caseSensitive = true`, spark sql 
does not complain.. it just bounce back the exception from hive, but logged as 
WARN message. And the table was created  successfully and I am able to insert 
and select. But if i create a hive serde table with `create table t2 (c1 int, 
C1 int) stored as parquet`, hive will complain and fail to create the table.  
So for the data source case, should we fix anything regarding the WARN message? 
Thanks!



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #16626: [SPARK-19261][SQL] Alter add columns for Hive ser...

Reply via email to