[GitHub] spark pull request #16626: [SPARK-19261][SQL] Alter add columns for Hive ser...

gatorsmile Sat, 18 Mar 2017 10:24:35 -0700

Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16626#discussion_r106786173
  
    --- Diff: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala 
---
    @@ -1860,4 +1861,119 @@ class HiveDDLSuite
           }
         }
       }
    +
    +  hiveFormats.foreach { tableType =>
    +    test(s"alter hive serde table add columns -- partitioned - 
$tableType") {
    +      withTable("tab") {
    +        sql(
    +          s"""
    +             |CREATE TABLE tab (c1 int, c2 int)
    +             |PARTITIONED BY (c3 int) STORED AS $tableType
    +          """.stripMargin)
    +
    +        sql("INSERT INTO tab PARTITION (c3=1) VALUES (1, 2)")
    +        sql("ALTER TABLE tab ADD COLUMNS (c4 int)")
    +        checkAnswer(
    +          sql("SELECT * FROM tab WHERE c3 = 1"),
    +          Seq(Row(1, 2, null, 1))
    +        )
    +        assert(sql("SELECT * FROM tab").schema
    +          .contains(StructField("c4", IntegerType)))
    +        sql("INSERT INTO tab PARTITION (c3=2) VALUES (2, 3, 4)")
    +        checkAnswer(
    +          sql("SELECT * FROM tab"),
    +          Seq(Row(1, 2, null, 1), Row(2, 3, 4, 2))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM tab WHERE c3 = 2 AND c4 IS NOT NULL"),
    +          Seq(Row(2, 3, 4, 2))
    +        )
    +      }
    +    }
    +  }
    +
    +  hiveFormats.foreach { tableType =>
    +    test(s"alter hive serde table add columns -- with predicate - 
$tableType ") {
    +      withTable("tab") {
    +        sql(s"CREATE TABLE tab (c1 int, c2 int) STORED AS $tableType")
    +        sql("INSERT INTO tab VALUES (1, 2)")
    +        sql("ALTER TABLE tab ADD COLUMNS (c4 int)")
    +        checkAnswer(
    +          sql("SELECT * FROM tab WHERE c4 IS NULL"),
    +          Seq(Row(1, 2, null))
    +        )
    +        assert(sql("SELECT * FROM tab").schema
    +          .contains(StructField("c4", IntegerType)))
    +        sql("INSERT INTO tab VALUES (2, 3, 4)")
    +        checkAnswer(
    +          sql("SELECT * FROM tab WHERE c4 = 4 "),
    +          Seq(Row(2, 3, 4))
    +        )
    +        checkAnswer(
    +          sql("SELECT * FROM tab"),
    +          Seq(Row(1, 2, null), Row(2, 3, 4))
    +        )
    +      }
    +    }
    +  }
    +
    +  Seq("orc", "ORC", "org.apache.spark.sql.hive.orc",
    +    "org.apache.spark.sql.hive.orc.DefaultSource").foreach { source =>
    +    test(s"alter datasource table add columns - $source format not 
supported") {
    +      withTable("tab") {
    +        sql(s"CREATE TABLE tab (c1 int) USING $source")
    +        val e = intercept[AnalysisException] {
    +          sql("ALTER TABLE tab ADD COLUMNS (c2 int)")
    +        }.getMessage
    +        assert(
    +          e.contains(s"ALTER ADD COLUMNS does not support datasource table 
with type"))
    +      }
    +    }
    +  }
    +
    +  Seq("true", "false").foreach { caseSensitive =>
    +    test(s"alter add columns with existing partition column name - 
caseSensitive $caseSensitive") {
    +      withSQLConf(("spark.sql.caseSensitive", caseSensitive)) {
    +        withTable("tab") {
    +          sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c2 int) STORED AS 
PARQUET")
    +          if (caseSensitive == "false") {
    +            val e = intercept[AnalysisException] {
    +              sql("ALTER TABLE tab ADD COLUMNS (C2 string)")
    +            }.getMessage
    +            assert(e.contains("Found duplicate column(s)"))
    +          } else {
    +            // hive catalog will still complains that c1 is duplicate 
column name because hive
    +            // identifiers are case insensitive.
    +            val e = intercept[AnalysisException] {
    +              sql("ALTER TABLE tab ADD COLUMNS (C2 string)")
    +            }.getMessage
    +            assert(e.contains("HiveException"))
    +          }
    +        }
    +      }
    +    }
    +  }
    +
    +  Seq("true", "false").foreach { caseSensitive =>
    +    test(s"alter add columns with existing column name - caseSensitive 
$caseSensitive") {
    +      withSQLConf(("spark.sql.caseSensitive", caseSensitive)) {
    +        withTable("t1") {
    +          sql("CREATE TABLE t1 (c1 int) USING PARQUET")
    +          if (caseSensitive == "false") {
    +            val e = intercept[AnalysisException] {
    +              sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
    +            }.getMessage
    +            assert(e.contains("Found duplicate column(s)"))
    +          } else {
    +            // hive catalog will still complains that c1 is duplicate 
column name because hive
    +            // identifiers are case insensitive.
    +            val e = intercept[AnalysisException] {
    +              sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
    +            }.getMessage
    +            assert(e.contains("HiveException"))
    +          }
    +        }
    +      }
    +    }
    +  }
    --- End diff --
    
    You still can combine it with the one in `InMemoryCatalogedDDLSuite` by 
using `isUsingHiveMetastore`



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #16626: [SPARK-19261][SQL] Alter add columns for Hive ser...

Reply via email to