This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 98058da21e8a [SPARK-54580][SQL] Consider Hive 4.1 in HiveVersionSuite
and HiveClientImpl
98058da21e8a is described below
commit 98058da21e8a341eca10207f0ca458671220ca94
Author: Kousuke Saruta <[email protected]>
AuthorDate: Sat Dec 6 15:49:19 2025 -0800
[SPARK-54580][SQL] Consider Hive 4.1 in HiveVersionSuite and HiveClientImpl
### What changes were proposed in this pull request?
This PR changes `HiveVersionSuite.scala` and `HiveClientImpl.scala` to
consider Hive 4.1.
#51809 added Hive 4.1 metastore support but Hive 4.1 is not considered in
these files.
### Why are the changes needed?
To ensure Spark woks with Hive 4.1 metastore.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added new test which fails without this change.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #53286 from sarutak/hive-client-versinons-issue.
Authored-by: Kousuke Saruta <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +-
.../scala/org/apache/spark/sql/hive/client/package.scala | 15 +++++++++++++--
.../apache/spark/sql/hive/client/HiveClientSuite.scala | 10 ++++++++++
.../apache/spark/sql/hive/client/HiveVersionSuite.scala | 3 ++-
4 files changed, 26 insertions(+), 4 deletions(-)
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index f1fe95b8a318..be46e54d18ec 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -875,7 +875,7 @@ private[hive] class HiveClientImpl(
// Since HIVE-18238(Hive 3.0.0), the Driver.close function's return type
changed
// and the CommandProcessorFactory.clean function removed.
driver.getClass.getMethod("close").invoke(driver)
- if (version != hive.v3_0 && version != hive.v3_1 && version !=
hive.v4_0) {
+ if (version < hive.v3_0) {
CommandProcessorFactory.clean(conf)
}
}
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index d7a0c58b4016..24ccbc7cbac4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.hive
+import org.apache.spark.util.Utils
+
/** Support for interacting with different versions of the HiveMetastoreClient
*/
package object client {
private[hive] sealed abstract class HiveVersion(
@@ -115,8 +117,17 @@ package object client {
exclusions =
"org.apache.curator:*" ::
"org.apache.hive:hive-service-rpc" ::
- "org.apache.tez:tez-api" ::
- "org.apache.zookeeper:zookeeper" :: Nil)
+ "org.apache.zookeeper:zookeeper" :: Nil ++
+ {
+ if (!Utils.isTesting) {
+ // HiveClientImpl#runHive which is used for testing refers
+ // `org.apache.hadoop.hive.ql.DriverContext` indirectly and
`DriverContext` refers
+ // Tez APIs.
+ Seq("org.apache.tez:tez-api")
+ } else {
+ Seq.empty
+ }
+ })
val allSupportedHiveVersions: Set[HiveVersion] =
Set(v2_0, v2_1, v2_2, v2_3, v3_0, v3_1, v4_0, v4_1)
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 585b8a92c8be..7db9632c87b9 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -609,6 +609,16 @@ class HiveClientSuite(version: String) extends
HiveVersionSuite(version) {
}
}
+ test("read table written by Hive") {
+ // Hive 3.0 and 3.1 don't work with JDK 11+ (HIVE-22097)
+ if (ver != hive.v3_0 && ver != hive.v3_1) {
+ withTable("test_tbl") {
+ client.runSqlHive("CREATE TABLE test_tbl AS SELECT 1")
+ assert(versionSpark.sql("SELECT * from test_tbl").collect() ===
Array(Row(1)))
+ }
+ }
+ }
+
///////////////////////////////////////////////////////////////////////////
// Miscellaneous API
///////////////////////////////////////////////////////////////////////////
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
index 1a45f6b15096..9866ce58775e 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
@@ -35,7 +35,8 @@ private[client] abstract class HiveVersionSuite(version:
String) extends SparkFu
hadoopConf.set("datanucleus.autoStartMechanismMode", "ignored")
hadoopConf.set("hive.metastore.schema.verification", "false")
// Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if
`hive.in.test=false`.
- if (version == "3.0" || version == "3.1" || version == "4.0") {
+ val ver = IsolatedClientLoader.hiveVersion(version)
+ if (hive.v3_0 <= ver) {
hadoopConf.set("hive.in.test", "true")
hadoopConf.set("hive.query.reexecution.enabled", "false")
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]