This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4c7edd2a2048 [SPARK-48864][SQL][TESTS] Refactor `HiveQuerySuite` and
fix bug
4c7edd2a2048 is described below
commit 4c7edd2a20480a8521fcc88a966b22619143aebd
Author: panbingkun <[email protected]>
AuthorDate: Fri Jul 12 15:22:34 2024 +0800
[SPARK-48864][SQL][TESTS] Refactor `HiveQuerySuite` and fix bug
### What changes were proposed in this pull request?
The pr aims to refactor `HiveQuerySuite` and `fix` bug, includes:
- use `getWorkspaceFilePath` to enable `HiveQuerySuite` to run successfully
in the IDE.
- make the test `lookup hive UDF in another thread` `independence`, without
relying on the previous UT `current_database with multiple sessions`.
- enable two test: `non-boolean conditions in a CaseWhen are illegal` and
`Dynamic partition folder layout`.
### Why are the changes needed?
- Run successfully in the `IDE`
Before:
<img width="1288" alt="image"
src="https://github.com/apache/spark/assets/15246973/005fd49c-3edf-4e51-8223-097fd7a485bf">
After:
<img width="1276" alt="image"
src="https://github.com/apache/spark/assets/15246973/caedec72-be0c-4bb5-bc06-26cceef8b4b8">
- Make UT `lookup hive UDF in another thread` `independence`
when `only` running it, it actually failed with the following error:
<img width="1318" alt="image"
src="https://github.com/apache/spark/assets/15246973/ef9c260f-8c0d-4821-8233-d4d7ae13802a">
**why ?**
Because the previous UT `current_database with multiple sessions`
changed `current database` and was not restored after it finished running.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- Manually test
- Pass GA.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47293 from panbingkun/refactor_HiveQuerySuite.
Authored-by: panbingkun <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
.../sql/hive/execution/HiveComparisonTest.scala | 5 +-
.../spark/sql/hive/execution/HiveQuerySuite.scala | 249 +++++++++++----------
2 files changed, 135 insertions(+), 119 deletions(-)
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index f0feccb4f494..87e58bb8fa13 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -100,8 +100,9 @@ abstract class HiveComparisonTest extends SparkFunSuite
with BeforeAndAfterAll {
.map(name => new File(targetDir, s"$suiteName.$name"))
/** The local directory with cached golden answer will be stored. */
- protected val answerCache = new File("src" + File.separator + "test" +
- File.separator + "resources" + File.separator + "golden")
+ protected val answerCache = getWorkspaceFilePath(
+ "sql", "hive", "src", "test", "resources", "golden").toFile
+
if (!answerCache.exists) {
answerCache.mkdir()
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 5ccb7f0d1f84..24d1e24b30c8 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -18,7 +18,6 @@
package org.apache.spark.sql.hive.execution
import java.io.File
-import java.net.URI
import java.nio.file.Files
import java.sql.Timestamp
@@ -679,15 +678,23 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
assert(actual === expected)
}
- // TODO: adopt this test when Spark SQL has the functionality / framework to
report errors.
- // See https://github.com/apache/spark/pull/1055#issuecomment-45820167 for a
discussion.
- ignore("non-boolean conditions in a CaseWhen are illegal") {
+ test("non-boolean conditions in a CaseWhen are illegal") {
checkError(
exception = intercept[AnalysisException] {
sql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM
src").collect()
},
- errorClass = null,
- parameters = Map.empty)
+ errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ parameters = Map(
+ "sqlExpr" -> "\"CASE WHEN (key > 2) THEN 3 WHEN 1 THEN 2 ELSE 0 END\"",
+ "paramIndex" -> "second",
+ "inputSql" -> "\"1\"",
+ "inputType" -> "\"INT\"",
+ "requiredType" -> "\"BOOLEAN\""),
+ context = ExpectedContext(
+ fragment = "CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END",
+ start = 8,
+ stop = 56)
+ )
}
createQueryTest("case sensitivity when query Hive table",
@@ -804,18 +811,19 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
}
test("ADD JAR command") {
- sql("CREATE TABLE alter1(a INT, b INT) USING HIVE")
- checkError(
- exception = intercept[AnalysisException] {
- sql(
- """ALTER TABLE alter1 SET SERDE
'org.apache.hadoop.hive.serde2.TestSerDe'
- |WITH serdeproperties('s1'='9')""".stripMargin)
- },
- errorClass = "_LEGACY_ERROR_TEMP_3065",
- parameters = Map(
- "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
- "msg" -> "at least one column must be specified for the table"))
- sql("DROP TABLE alter1")
+ withTable("alter1") {
+ sql("CREATE TABLE alter1(a INT, b INT) USING HIVE")
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(
+ """ALTER TABLE alter1 SET SERDE
'org.apache.hadoop.hive.serde2.TestSerDe'
+ |WITH serdeproperties('s1'='9')""".stripMargin)
+ },
+ errorClass = "_LEGACY_ERROR_TEMP_3065",
+ parameters = Map(
+ "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
+ "msg" -> "at least one column must be specified for the table"))
+ }
}
test("ADD JAR command 2") {
@@ -823,12 +831,13 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
val testJar = HiveTestJars.getHiveHcatalogCoreJar().toURI
val testData = TestHive.getHiveFile("data/files/sample.json").toURI
sql(s"ADD JAR $testJar")
- sql(
- """CREATE TABLE t1(a string, b string)
- |ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)
- sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t1""")
- sql("select * from src join t1 on src.key = t1.a")
- sql("DROP TABLE t1")
+ withTable("t1") {
+ sql(
+ """CREATE TABLE t1(a string, b string)
+ |ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)
+ sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t1""")
+ sql("select * from src join t1 on src.key = t1.a")
+ }
assert(sql("list jars").
filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count()
> 0)
assert(sql("list jar").
@@ -854,12 +863,13 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
val funcJar = TestHive.getHiveFile("TestUDTF.jar")
val jarURL = funcJar.toURI.toURL
sql(s"ADD JAR $jarURL")
- sql(
- """CREATE TEMPORARY FUNCTION udtf_count2 AS
- |'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
- """.stripMargin)
- assert(sql("DESCRIBE FUNCTION udtf_count2").count() > 1)
- sql("DROP TEMPORARY FUNCTION udtf_count2")
+ withUserDefinedFunction("udtf_count2" -> true) {
+ sql(
+ """CREATE TEMPORARY FUNCTION udtf_count2 AS
+ |'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+ """.stripMargin)
+ assert(sql("DESCRIBE FUNCTION udtf_count2").count() > 1)
+ }
}
test("ADD FILE command") {
@@ -1167,43 +1177,48 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
|DROP TABLE IF EXISTS dynamic_part_table;
""".stripMargin)
- ignore("Dynamic partition folder layout") {
- sql("DROP TABLE IF EXISTS dynamic_part_table")
- sql("CREATE TABLE dynamic_part_table(intcol INT) PARTITIONED BY (partcol1
INT, partcol2 INT)")
- sql("SET hive.exec.dynamic.partition.mode=nonstrict")
+ test("Dynamic partition folder layout") {
+ withTempDir { dir =>
+ withTable("dynamic_part_table") {
+ sql("CREATE TABLE dynamic_part_table(intcol INT) USING HIVE " +
+ s"PARTITIONED BY (partcol1 INT, partcol2 INT) " +
+ s"LOCATION '${dir.getCanonicalPath}/dynamic_part_table'")
+ sql("SET hive.exec.dynamic.partition.mode=nonstrict")
+
+ val data = Map(
+ Seq("1", "1") -> 1,
+ Seq("1", "NULL") -> 2,
+ Seq("NULL", "1") -> 3,
+ Seq("NULL", "NULL") -> 4)
+
+ data.foreach { case (parts, value) =>
+ sql(
+ s"""INSERT INTO TABLE dynamic_part_table PARTITION(partcol1,
partcol2)
+ |SELECT $value, ${parts.mkString(", ")} FROM src WHERE key=150
+ """.stripMargin)
+
+ val partFolder = Seq("partcol1", "partcol2")
+ .zip(parts)
+ .map { case (k, v) =>
+ if (v == "NULL") {
+ s"$k=${ConfVars.DEFAULTPARTITIONNAME.defaultStrVal}"
+ } else {
+ s"$k=$v"
+ }
+ }
+ .mkString("/")
- val data = Map(
- Seq("1", "1") -> 1,
- Seq("1", "NULL") -> 2,
- Seq("NULL", "1") -> 3,
- Seq("NULL", "NULL") -> 4)
+ // Loads partition data to a temporary table to verify contents
+ val path =
s"${dir.getCanonicalPath}/dynamic_part_table/$partFolder/part-00000*"
- data.foreach { case (parts, value) =>
- sql(
- s"""INSERT INTO TABLE dynamic_part_table PARTITION(partcol1, partcol2)
- |SELECT $value, ${parts.mkString(", ")} FROM src WHERE key=150
- """.stripMargin)
-
- val partFolder = Seq("partcol1", "partcol2")
- .zip(parts)
- .map { case (k, v) =>
- if (v == "NULL") {
- s"$k=${ConfVars.DEFAULTPARTITIONNAME.defaultStrVal}"
- } else {
- s"$k=$v"
+ withTable("dp_verify") {
+ sql("CREATE TABLE dp_verify(intcol INT) USING HIVE")
+ sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE dp_verify")
+
+ assert(sql("SELECT * FROM dp_verify").collect() ===
Array(Row(value)))
}
}
- .mkString("/")
-
- // Loads partition data to a temporary table to verify contents
- val warehousePathFile = new URI(sparkSession.getWarehousePath()).getPath
- val path =
s"$warehousePathFile/dynamic_part_table/$partFolder/part-00000"
-
- sql("DROP TABLE IF EXISTS dp_verify")
- sql("CREATE TABLE dp_verify(intcol INT)")
- sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE dp_verify")
-
- assert(sql("SELECT * FROM dp_verify").collect() === Array(Row(value)))
+ }
}
}
@@ -1334,69 +1349,69 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
}
test("current_database with multiple sessions") {
- sql("create database a")
- sql("use a")
- val s2 = newSession()
- s2.sql("create database b")
- s2.sql("use b")
+ withCurrentCatalogAndNamespace {
+ sql("create database a")
+ sql("use a")
+ val s2 = newSession()
+ s2.sql("create database b")
+ s2.sql("use b")
- assert(sql("select current_database()").first() === Row("a"))
- assert(s2.sql("select current_database()").first() === Row("b"))
+ assert(sql("select current_database()").first() === Row("a"))
+ assert(s2.sql("select current_database()").first() === Row("b"))
- try {
- sql("create table test_a(key INT, value STRING)")
- s2.sql("create table test_b(key INT, value STRING)")
+ try {
+ sql("create table test_a(key INT, value STRING)")
+ s2.sql("create table test_b(key INT, value STRING)")
- sql("select * from test_a")
- checkError(
- exception = intercept[AnalysisException] {
- sql("select * from test_b")
- },
- errorClass = "TABLE_OR_VIEW_NOT_FOUND",
- parameters = Map("relationName" -> "`test_b`"),
- context = ExpectedContext(
- fragment = "test_b",
- start = 14,
- stop = 19))
+ sql("select * from test_a")
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("select * from test_b")
+ },
+ errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+ parameters = Map("relationName" -> "`test_b`"),
+ context = ExpectedContext(
+ fragment = "test_b",
+ start = 14,
+ stop = 19))
- sql("select * from b.test_b")
+ sql("select * from b.test_b")
- s2.sql("select * from test_b")
- checkError(
- exception = intercept[AnalysisException] {
- s2.sql("select * from test_a")
- },
- errorClass = "TABLE_OR_VIEW_NOT_FOUND",
- parameters = Map("relationName" -> "`test_a`"),
- context = ExpectedContext(
- fragment = "test_a",
- start = 14,
- stop = 19))
- s2.sql("select * from a.test_a")
- } finally {
- sql("DROP TABLE IF EXISTS test_a")
- s2.sql("DROP TABLE IF EXISTS test_b")
+ s2.sql("select * from test_b")
+ checkError(
+ exception = intercept[AnalysisException] {
+ s2.sql("select * from test_a")
+ },
+ errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+ parameters = Map("relationName" -> "`test_a`"),
+ context = ExpectedContext(
+ fragment = "test_a",
+ start = 14,
+ stop = 19))
+ s2.sql("select * from a.test_a")
+ } finally {
+ sql("DROP TABLE IF EXISTS test_a")
+ s2.sql("DROP TABLE IF EXISTS test_b")
+ }
}
-
}
test("use database") {
val currentDatabase = sql("select current_database()").first().getString(0)
+ withCurrentCatalogAndNamespace {
+ sql("CREATE DATABASE hive_test_db")
+ sql("USE hive_test_db")
+ assert("hive_test_db" == sql("select
current_database()").first().getString(0))
+ assert("hive_test_db" == sql("select
current_schema()").first().getString(0))
- sql("CREATE DATABASE hive_test_db")
- sql("USE hive_test_db")
- assert("hive_test_db" == sql("select
current_database()").first().getString(0))
-
- assert("hive_test_db" == sql("select
current_schema()").first().getString(0))
-
- checkError(
- exception = intercept[AnalysisException] {
- sql("USE not_existing_db")
- },
- errorClass = "SCHEMA_NOT_FOUND",
- parameters = Map("schemaName" -> "`spark_catalog`.`not_existing_db`"))
-
- sql(s"USE $currentDatabase")
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("USE not_existing_db")
+ },
+ errorClass = "SCHEMA_NOT_FOUND",
+ parameters = Map("schemaName" -> "`spark_catalog`.`not_existing_db`")
+ )
+ }
assert(currentDatabase == sql("select
current_database()").first().getString(0))
}
@@ -1409,7 +1424,7 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
sqlState = None,
parameters = Map(
"routineName" -> "`not_a_udf`",
- "searchPath" -> "[`system`.`builtin`, `system`.`session`,
`spark_catalog`.`a`]"),
+ "searchPath" -> "[`system`.`builtin`, `system`.`session`,
`spark_catalog`.`default`]"),
context = ExpectedContext(
fragment = "not_a_udf()",
start = 0,
@@ -1426,7 +1441,7 @@ class HiveQuerySuite extends HiveComparisonTest with
SQLTestUtils with BeforeAnd
sqlState = None,
parameters = Map(
"routineName" -> "`not_a_udf`",
- "searchPath" -> "[`system`.`builtin`, `system`.`session`,
`spark_catalog`.`a`]"),
+ "searchPath" -> "[`system`.`builtin`, `system`.`session`,
`spark_catalog`.`default`]"),
context = ExpectedContext(
fragment = "not_a_udf()",
start = 0,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]