(spark) branch master updated: [SPARK-48864][SQL][TESTS] Refactor `HiveQuerySuite` and fix bug

yangjie01 Fri, 12 Jul 2024 00:24:15 -0700

This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 4c7edd2a2048 [SPARK-48864][SQL][TESTS] Refactor `HiveQuerySuite` and 
fix bug
4c7edd2a2048 is described below

commit 4c7edd2a20480a8521fcc88a966b22619143aebd
Author: panbingkun <[email protected]>
AuthorDate: Fri Jul 12 15:22:34 2024 +0800

    [SPARK-48864][SQL][TESTS] Refactor `HiveQuerySuite` and fix bug
    
    ### What changes were proposed in this pull request?
    The pr aims to refactor `HiveQuerySuite` and `fix` bug, includes:
    - use `getWorkspaceFilePath` to enable `HiveQuerySuite` to run successfully 
in the IDE.
    - make the test `lookup hive UDF in another thread` `independence`, without 
relying on the previous UT `current_database with multiple sessions`.
    - enable two test: `non-boolean conditions in a CaseWhen are illegal` and 
`Dynamic partition folder layout`.
    
    ### Why are the changes needed?
    - Run successfully in the `IDE`
      Before:
      <img width="1288" alt="image" 
src="https://github.com/apache/spark/assets/15246973/005fd49c-3edf-4e51-8223-097fd7a485bf";>
    
      After:
      <img width="1276" alt="image" 
src="https://github.com/apache/spark/assets/15246973/caedec72-be0c-4bb5-bc06-26cceef8b4b8";>
    
    - Make UT `lookup hive UDF in another thread` `independence`
      when `only` running it, it actually failed with the following error:
      <img width="1318" alt="image" 
src="https://github.com/apache/spark/assets/15246973/ef9c260f-8c0d-4821-8233-d4d7ae13802a";>
    
      **why ?**
      Because the previous UT `current_database with multiple sessions`  
changed `current database` and was not restored after it finished running.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    - Manually test
    - Pass GA.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #47293 from panbingkun/refactor_HiveQuerySuite.
    
    Authored-by: panbingkun <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
---
 .../sql/hive/execution/HiveComparisonTest.scala    |   5 +-
 .../spark/sql/hive/execution/HiveQuerySuite.scala  | 249 +++++++++++----------
 2 files changed, 135 insertions(+), 119 deletions(-)

diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index f0feccb4f494..87e58bb8fa13 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -100,8 +100,9 @@ abstract class HiveComparisonTest extends SparkFunSuite 
with BeforeAndAfterAll {
       .map(name => new File(targetDir, s"$suiteName.$name"))
 
   /** The local directory with cached golden answer will be stored. */
-  protected val answerCache = new File("src" + File.separator + "test" +
-    File.separator + "resources" + File.separator + "golden")
+  protected val answerCache = getWorkspaceFilePath(
+    "sql", "hive", "src", "test", "resources", "golden").toFile
+
   if (!answerCache.exists) {
     answerCache.mkdir()
   }
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 5ccb7f0d1f84..24d1e24b30c8 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.hive.execution
 
 import java.io.File
-import java.net.URI
 import java.nio.file.Files
 import java.sql.Timestamp
 
@@ -679,15 +678,23 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
     assert(actual === expected)
   }
 
-  // TODO: adopt this test when Spark SQL has the functionality / framework to 
report errors.
-  // See https://github.com/apache/spark/pull/1055#issuecomment-45820167 for a 
discussion.
-  ignore("non-boolean conditions in a CaseWhen are illegal") {
+  test("non-boolean conditions in a CaseWhen are illegal") {
     checkError(
       exception = intercept[AnalysisException] {
         sql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM 
src").collect()
       },
-      errorClass = null,
-      parameters = Map.empty)
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"CASE WHEN (key > 2) THEN 3 WHEN 1 THEN 2 ELSE 0 END\"",
+        "paramIndex" -> "second",
+        "inputSql" -> "\"1\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"BOOLEAN\""),
+      context = ExpectedContext(
+        fragment = "CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END",
+        start = 8,
+        stop = 56)
+    )
   }
 
   createQueryTest("case sensitivity when query Hive table",
@@ -804,18 +811,19 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
   }
 
   test("ADD JAR command") {
-    sql("CREATE TABLE alter1(a INT, b INT) USING HIVE")
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql(
-          """ALTER TABLE alter1 SET SERDE 
'org.apache.hadoop.hive.serde2.TestSerDe'
-            |WITH serdeproperties('s1'='9')""".stripMargin)
-      },
-      errorClass = "_LEGACY_ERROR_TEMP_3065",
-      parameters = Map(
-        "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
-        "msg" -> "at least one column must be specified for the table"))
-    sql("DROP TABLE alter1")
+    withTable("alter1") {
+      sql("CREATE TABLE alter1(a INT, b INT) USING HIVE")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """ALTER TABLE alter1 SET SERDE 
'org.apache.hadoop.hive.serde2.TestSerDe'
+              |WITH serdeproperties('s1'='9')""".stripMargin)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_3065",
+        parameters = Map(
+          "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
+          "msg" -> "at least one column must be specified for the table"))
+    }
   }
 
   test("ADD JAR command 2") {
@@ -823,12 +831,13 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
     val testJar = HiveTestJars.getHiveHcatalogCoreJar().toURI
     val testData = TestHive.getHiveFile("data/files/sample.json").toURI
     sql(s"ADD JAR $testJar")
-    sql(
-      """CREATE TABLE t1(a string, b string)
-      |ROW FORMAT SERDE 
'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)
-    sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t1""")
-    sql("select * from src join t1 on src.key = t1.a")
-    sql("DROP TABLE t1")
+    withTable("t1") {
+      sql(
+        """CREATE TABLE t1(a string, b string)
+          |ROW FORMAT SERDE 
'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)
+      sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t1""")
+      sql("select * from src join t1 on src.key = t1.a")
+    }
     assert(sql("list jars").
       
filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count()
 > 0)
     assert(sql("list jar").
@@ -854,12 +863,13 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
     val funcJar = TestHive.getHiveFile("TestUDTF.jar")
     val jarURL = funcJar.toURI.toURL
     sql(s"ADD JAR $jarURL")
-    sql(
-      """CREATE TEMPORARY FUNCTION udtf_count2 AS
-        |'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
-      """.stripMargin)
-    assert(sql("DESCRIBE FUNCTION udtf_count2").count() > 1)
-    sql("DROP TEMPORARY FUNCTION udtf_count2")
+    withUserDefinedFunction("udtf_count2" -> true) {
+      sql(
+        """CREATE TEMPORARY FUNCTION udtf_count2 AS
+          |'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+        """.stripMargin)
+      assert(sql("DESCRIBE FUNCTION udtf_count2").count() > 1)
+    }
   }
 
   test("ADD FILE command") {
@@ -1167,43 +1177,48 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
       |DROP TABLE IF EXISTS dynamic_part_table;
     """.stripMargin)
 
-  ignore("Dynamic partition folder layout") {
-    sql("DROP TABLE IF EXISTS dynamic_part_table")
-    sql("CREATE TABLE dynamic_part_table(intcol INT) PARTITIONED BY (partcol1 
INT, partcol2 INT)")
-    sql("SET hive.exec.dynamic.partition.mode=nonstrict")
+  test("Dynamic partition folder layout") {
+    withTempDir { dir =>
+      withTable("dynamic_part_table") {
+        sql("CREATE TABLE dynamic_part_table(intcol INT) USING HIVE " +
+          s"PARTITIONED BY (partcol1 INT, partcol2 INT) " +
+          s"LOCATION '${dir.getCanonicalPath}/dynamic_part_table'")
+        sql("SET hive.exec.dynamic.partition.mode=nonstrict")
+
+        val data = Map(
+          Seq("1", "1") -> 1,
+          Seq("1", "NULL") -> 2,
+          Seq("NULL", "1") -> 3,
+          Seq("NULL", "NULL") -> 4)
+
+        data.foreach { case (parts, value) =>
+          sql(
+            s"""INSERT INTO TABLE dynamic_part_table PARTITION(partcol1, 
partcol2)
+               |SELECT $value, ${parts.mkString(", ")} FROM src WHERE key=150
+             """.stripMargin)
+
+          val partFolder = Seq("partcol1", "partcol2")
+            .zip(parts)
+            .map { case (k, v) =>
+              if (v == "NULL") {
+                s"$k=${ConfVars.DEFAULTPARTITIONNAME.defaultStrVal}"
+              } else {
+                s"$k=$v"
+              }
+            }
+            .mkString("/")
 
-    val data = Map(
-      Seq("1", "1") -> 1,
-      Seq("1", "NULL") -> 2,
-      Seq("NULL", "1") -> 3,
-      Seq("NULL", "NULL") -> 4)
+          // Loads partition data to a temporary table to verify contents
+          val path = 
s"${dir.getCanonicalPath}/dynamic_part_table/$partFolder/part-00000*"
 
-    data.foreach { case (parts, value) =>
-      sql(
-        s"""INSERT INTO TABLE dynamic_part_table PARTITION(partcol1, partcol2)
-           |SELECT $value, ${parts.mkString(", ")} FROM src WHERE key=150
-         """.stripMargin)
-
-      val partFolder = Seq("partcol1", "partcol2")
-        .zip(parts)
-        .map { case (k, v) =>
-          if (v == "NULL") {
-            s"$k=${ConfVars.DEFAULTPARTITIONNAME.defaultStrVal}"
-          } else {
-            s"$k=$v"
+          withTable("dp_verify") {
+            sql("CREATE TABLE dp_verify(intcol INT) USING HIVE")
+            sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE dp_verify")
+
+            assert(sql("SELECT * FROM dp_verify").collect() === 
Array(Row(value)))
           }
         }
-        .mkString("/")
-
-      // Loads partition data to a temporary table to verify contents
-      val warehousePathFile = new URI(sparkSession.getWarehousePath()).getPath
-      val path = 
s"$warehousePathFile/dynamic_part_table/$partFolder/part-00000"
-
-      sql("DROP TABLE IF EXISTS dp_verify")
-      sql("CREATE TABLE dp_verify(intcol INT)")
-      sql(s"LOAD DATA LOCAL INPATH '$path' INTO TABLE dp_verify")
-
-      assert(sql("SELECT * FROM dp_verify").collect() === Array(Row(value)))
+      }
     }
   }
 
@@ -1334,69 +1349,69 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
   }
 
   test("current_database with multiple sessions") {
-    sql("create database a")
-    sql("use a")
-    val s2 = newSession()
-    s2.sql("create database b")
-    s2.sql("use b")
+    withCurrentCatalogAndNamespace {
+      sql("create database a")
+      sql("use a")
+      val s2 = newSession()
+      s2.sql("create database b")
+      s2.sql("use b")
 
-    assert(sql("select current_database()").first() === Row("a"))
-    assert(s2.sql("select current_database()").first() === Row("b"))
+      assert(sql("select current_database()").first() === Row("a"))
+      assert(s2.sql("select current_database()").first() === Row("b"))
 
-    try {
-      sql("create table test_a(key INT, value STRING)")
-      s2.sql("create table test_b(key INT, value STRING)")
+      try {
+        sql("create table test_a(key INT, value STRING)")
+        s2.sql("create table test_b(key INT, value STRING)")
 
-      sql("select * from test_a")
-      checkError(
-        exception = intercept[AnalysisException] {
-          sql("select * from test_b")
-        },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
-        parameters = Map("relationName" -> "`test_b`"),
-        context = ExpectedContext(
-          fragment = "test_b",
-          start = 14,
-          stop = 19))
+        sql("select * from test_a")
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("select * from test_b")
+          },
+          errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+          parameters = Map("relationName" -> "`test_b`"),
+          context = ExpectedContext(
+            fragment = "test_b",
+            start = 14,
+            stop = 19))
 
-      sql("select * from b.test_b")
+        sql("select * from b.test_b")
 
-      s2.sql("select * from test_b")
-      checkError(
-        exception = intercept[AnalysisException] {
-          s2.sql("select * from test_a")
-        },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
-        parameters = Map("relationName" -> "`test_a`"),
-        context = ExpectedContext(
-          fragment = "test_a",
-          start = 14,
-          stop = 19))
-      s2.sql("select * from a.test_a")
-    } finally {
-      sql("DROP TABLE IF EXISTS test_a")
-      s2.sql("DROP TABLE IF EXISTS test_b")
+        s2.sql("select * from test_b")
+        checkError(
+          exception = intercept[AnalysisException] {
+            s2.sql("select * from test_a")
+          },
+          errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+          parameters = Map("relationName" -> "`test_a`"),
+          context = ExpectedContext(
+            fragment = "test_a",
+            start = 14,
+            stop = 19))
+        s2.sql("select * from a.test_a")
+      } finally {
+        sql("DROP TABLE IF EXISTS test_a")
+        s2.sql("DROP TABLE IF EXISTS test_b")
+      }
     }
-
   }
 
   test("use database") {
     val currentDatabase = sql("select current_database()").first().getString(0)
+    withCurrentCatalogAndNamespace {
+      sql("CREATE DATABASE hive_test_db")
+      sql("USE hive_test_db")
+      assert("hive_test_db" == sql("select 
current_database()").first().getString(0))
+      assert("hive_test_db" == sql("select 
current_schema()").first().getString(0))
 
-    sql("CREATE DATABASE hive_test_db")
-    sql("USE hive_test_db")
-    assert("hive_test_db" == sql("select 
current_database()").first().getString(0))
-
-    assert("hive_test_db" == sql("select 
current_schema()").first().getString(0))
-
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("USE not_existing_db")
-      },
-      errorClass = "SCHEMA_NOT_FOUND",
-      parameters = Map("schemaName" -> "`spark_catalog`.`not_existing_db`"))
-
-    sql(s"USE $currentDatabase")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("USE not_existing_db")
+        },
+        errorClass = "SCHEMA_NOT_FOUND",
+        parameters = Map("schemaName" -> "`spark_catalog`.`not_existing_db`")
+      )
+    }
     assert(currentDatabase == sql("select 
current_database()").first().getString(0))
   }
 
@@ -1409,7 +1424,7 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
       sqlState = None,
       parameters = Map(
         "routineName" -> "`not_a_udf`",
-        "searchPath" -> "[`system`.`builtin`, `system`.`session`, 
`spark_catalog`.`a`]"),
+        "searchPath" -> "[`system`.`builtin`, `system`.`session`, 
`spark_catalog`.`default`]"),
       context = ExpectedContext(
         fragment = "not_a_udf()",
         start = 0,
@@ -1426,7 +1441,7 @@ class HiveQuerySuite extends HiveComparisonTest with 
SQLTestUtils with BeforeAnd
           sqlState = None,
           parameters = Map(
             "routineName" -> "`not_a_udf`",
-            "searchPath" -> "[`system`.`builtin`, `system`.`session`, 
`spark_catalog`.`a`]"),
+            "searchPath" -> "[`system`.`builtin`, `system`.`session`, 
`spark_catalog`.`default`]"),
           context = ExpectedContext(
             fragment = "not_a_udf()",
             start = 0,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-48864][SQL][TESTS] Refactor `HiveQuerySuite` and fix bug

Reply via email to