Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19470#discussion_r144458108
--- Diff:
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
---
@@ -2050,4 +2050,80 @@ class SQLQuerySuite extends QueryTest with
SQLTestUtils with TestHiveSingleton {
}
}
}
+
+ test("SPARK-18355 Use Spark schema to read ORC table instead of ORC file
schema") {
+ val client =
spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+
+ Seq("true", "false").foreach { value =>
+ withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
+ withTempDatabase { db =>
+ client.runSqlHive(
+ s"""
+ |CREATE TABLE $db.t(
+ | click_id string,
+ | search_id string,
+ | uid bigint)
+ |PARTITIONED BY (
+ | ts string,
+ | hour string)
+ |STORED AS ORC
+ """.stripMargin)
+
+ client.runSqlHive(
+ s"""
+ |INSERT INTO TABLE $db.t
+ |PARTITION (ts = '98765', hour = '01')
+ |VALUES (12, 2, 12345)
+ """.stripMargin
+ )
+
+ checkAnswer(
+ sql(s"SELECT * FROM $db.t"),
+ Row("12", "2", 12345, "98765", "01"))
+
+ client.runSqlHive(s"ALTER TABLE $db.t ADD COLUMNS (dummy
string)")
+
+ checkAnswer(
+ sql(s"SELECT click_id, search_id FROM $db.t"),
+ Row("12", "2"))
+
+ checkAnswer(
+ sql(s"SELECT search_id, click_id FROM $db.t"),
+ Row("2", "12"))
+
+ checkAnswer(
+ sql(s"SELECT search_id FROM $db.t"),
+ Row("2"))
+
+ checkAnswer(
+ sql(s"SELECT dummy, click_id FROM $db.t"),
+ Row(null, "12"))
+
+ checkAnswer(
+ sql(s"SELECT * FROM $db.t"),
+ Row("12", "2", 12345, null, "98765", "01"))
+ }
+ }
+ }
+ }
+
+ // This test case is added to prevent regression.
+ test("SPARK-22267 Spark SQL incorrectly reads ORC files when column
order is different") {
--- End diff --
it's weird to have a test verifying a bug, I think it's good enough to have
a JIRA tracking this bug.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]