spark git commit: [SPARK-21929][SQL] Support `ALTER TABLE table_name ADD COLUMNS(..)` for ORC data source

lixiao Sat, 21 Oct 2017 18:02:48 -0700

Repository: spark
Updated Branches:
  refs/heads/master ff8de99a1 -> ca2a780e7



[SPARK-21929][SQL] Support `ALTER TABLE table_name ADD COLUMNS(..)` for ORC 
data source

## What changes were proposed in this pull request?

When [SPARK-19261](https://issues.apache.org/jira/browse/SPARK-19261) 
implements `ALTER TABLE ADD COLUMNS`, ORC data source is omitted due to 
SPARK-14387, SPARK-16628, and SPARK-18355. Now, those issues are fixed and 
Spark 2.3 is [using Spark schema to read ORC table instead of ORC file 
schema](https://github.com/apache/spark/commit/e6e36004afc3f9fc8abea98542248e9de11b4435).
 This PR enables `ALTER TABLE ADD COLUMNS` for ORC data source.

## How was this patch tested?

Pass the updated and added test cases.

Author: Dongjoon Hyun <[email protected]>

Closes #19545 from dongjoon-hyun/SPARK-21929.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca2a780e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca2a780e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca2a780e

Branch: refs/heads/master
Commit: ca2a780e7c4c4df2488ef933241c6e65264f8d3c
Parents: ff8de99
Author: Dongjoon Hyun <[email protected]>
Authored: Sat Oct 21 18:01:45 2017 -0700
Committer: gatorsmile <[email protected]>
Committed: Sat Oct 21 18:01:45 2017 -0700

----------------------------------------------------------------------
 .../spark/sql/execution/command/tables.scala    |  3 +-
 .../spark/sql/execution/command/DDLSuite.scala  | 90 +++++++++++---------
 .../spark/sql/hive/execution/HiveDDLSuite.scala |  8 ++
 3 files changed, 58 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ca2a780e/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 8d95ca6..38f9163 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -235,11 +235,10 @@ case class AlterTableAddColumnsCommand(
       DataSource.lookupDataSource(catalogTable.provider.get).newInstance() 
match {
         // For datasource table, this command can only support the following 
File format.
         // TextFileFormat only default to one column "value"
-        // OrcFileFormat can not handle difference between user-specified 
schema and
-        // inferred schema yet. TODO, once this issue is resolved , we can add 
Orc back.
         // Hive type is already considered as hive serde table, so the logic 
will not
         // come in here.
         case _: JsonFileFormat | _: CSVFileFormat | _: ParquetFileFormat =>
+        case s if s.getClass.getCanonicalName.endsWith("OrcFileFormat") =>
         case s =>
           throw new AnalysisException(
             s"""

http://git-wip-us.apache.org/repos/asf/spark/blob/ca2a780e/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 4ed2cec..21a2c62 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2202,56 +2202,64 @@ abstract class DDLSuite extends QueryTest with 
SQLTestUtils {
     }
   }
 
+  protected def testAddColumn(provider: String): Unit = {
+    withTable("t1") {
+      sql(s"CREATE TABLE t1 (c1 int) USING $provider")
+      sql("INSERT INTO t1 VALUES (1)")
+      sql("ALTER TABLE t1 ADD COLUMNS (c2 int)")
+      checkAnswer(
+        spark.table("t1"),
+        Seq(Row(1, null))
+      )
+      checkAnswer(
+        sql("SELECT * FROM t1 WHERE c2 is null"),
+        Seq(Row(1, null))
+      )
+
+      sql("INSERT INTO t1 VALUES (3, 2)")
+      checkAnswer(
+        sql("SELECT * FROM t1 WHERE c2 = 2"),
+        Seq(Row(3, 2))
+      )
+    }
+  }
+
+  protected def testAddColumnPartitioned(provider: String): Unit = {
+    withTable("t1") {
+      sql(s"CREATE TABLE t1 (c1 int, c2 int) USING $provider PARTITIONED BY 
(c2)")
+      sql("INSERT INTO t1 PARTITION(c2 = 2) VALUES (1)")
+      sql("ALTER TABLE t1 ADD COLUMNS (c3 int)")
+      checkAnswer(
+        spark.table("t1"),
+        Seq(Row(1, null, 2))
+      )
+      checkAnswer(
+        sql("SELECT * FROM t1 WHERE c3 is null"),
+        Seq(Row(1, null, 2))
+      )
+      sql("INSERT INTO t1 PARTITION(c2 =1) VALUES (2, 3)")
+      checkAnswer(
+        sql("SELECT * FROM t1 WHERE c3 = 3"),
+        Seq(Row(2, 3, 1))
+      )
+      checkAnswer(
+        sql("SELECT * FROM t1 WHERE c2 = 1"),
+        Seq(Row(2, 3, 1))
+      )
+    }
+  }
+
   val supportedNativeFileFormatsForAlterTableAddColumns = Seq("parquet", 
"json", "csv")
 
   supportedNativeFileFormatsForAlterTableAddColumns.foreach { provider =>
     test(s"alter datasource table add columns - $provider") {
-      withTable("t1") {
-        sql(s"CREATE TABLE t1 (c1 int) USING $provider")
-        sql("INSERT INTO t1 VALUES (1)")
-        sql("ALTER TABLE t1 ADD COLUMNS (c2 int)")
-        checkAnswer(
-          spark.table("t1"),
-          Seq(Row(1, null))
-        )
-        checkAnswer(
-          sql("SELECT * FROM t1 WHERE c2 is null"),
-          Seq(Row(1, null))
-        )
-
-        sql("INSERT INTO t1 VALUES (3, 2)")
-        checkAnswer(
-          sql("SELECT * FROM t1 WHERE c2 = 2"),
-          Seq(Row(3, 2))
-        )
-      }
+      testAddColumn(provider)
     }
   }
 
   supportedNativeFileFormatsForAlterTableAddColumns.foreach { provider =>
     test(s"alter datasource table add columns - partitioned - $provider") {
-      withTable("t1") {
-        sql(s"CREATE TABLE t1 (c1 int, c2 int) USING $provider PARTITIONED BY 
(c2)")
-        sql("INSERT INTO t1 PARTITION(c2 = 2) VALUES (1)")
-        sql("ALTER TABLE t1 ADD COLUMNS (c3 int)")
-        checkAnswer(
-          spark.table("t1"),
-          Seq(Row(1, null, 2))
-        )
-        checkAnswer(
-          sql("SELECT * FROM t1 WHERE c3 is null"),
-          Seq(Row(1, null, 2))
-        )
-        sql("INSERT INTO t1 PARTITION(c2 =1) VALUES (2, 3)")
-        checkAnswer(
-          sql("SELECT * FROM t1 WHERE c3 = 3"),
-          Seq(Row(2, 3, 1))
-        )
-        checkAnswer(
-          sql("SELECT * FROM t1 WHERE c2 = 1"),
-          Seq(Row(2, 3, 1))
-        )
-      }
+      testAddColumnPartitioned(provider)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ca2a780e/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 02e26bb..d3465a6 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -166,6 +166,14 @@ class HiveCatalogedDDLSuite extends DDLSuite with 
TestHiveSingleton with BeforeA
   test("drop table") {
     testDropTable(isDatasourceTable = false)
   }
+
+  test("alter datasource table add columns - orc") {
+    testAddColumn("orc")
+  }
+
+  test("alter datasource table add columns - partitioned - orc") {
+    testAddColumnPartitioned("orc")
+  }
 }
 
 class HiveDDLSuite


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-21929][SQL] Support `ALTER TABLE table_name ADD COLUMNS(..)` for ORC data source

Reply via email to