This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a3266b411723 [SPARK-46695][SQL][HIVE] Always setting
hive.execution.engine to mr
a3266b411723 is described below
commit a3266b411723310ec10fc1843ddababc15249db0
Author: Cheng Pan <[email protected]>
AuthorDate: Fri Jan 12 03:10:17 2024 -0800
[SPARK-46695][SQL][HIVE] Always setting hive.execution.engine to mr
### What changes were proposed in this pull request?
Previously, we only set `hive.execution.engine` to "mr" when the original
value is "tez", this PR aims to always set `hive.execution.engine` to "mr".
### Why are the changes needed?
The Apache Hive supports three engines MR, Spark, Tez, same as "tez", when
setting `hive.execution.engine` to "spark", this may also initial some
unexpected components.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
```
SPARK_TEST_HIVE_CLIENT_VERSIONS=2.3,3.1 \
build/sbt -Phive 'hive/testOnly *HiveClientSuites -- -z "override useless
and side-effect hive configurations"'
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44704 from pan3793/SPARK-46695.
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/hive/client/HiveClientImpl.scala | 9 +++++----
.../spark/sql/hive/client/HiveClientSuites.scala | 22 ++++++++++++----------
2 files changed, 17 insertions(+), 14 deletions(-)
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 72388a8d4b98..946e698dc6ab 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -1318,10 +1318,11 @@ private[hive] object HiveClientImpl extends Logging {
" false to disable useless hive logic")
hiveConf.setBoolean("hive.session.history.enabled", false)
}
- // If this is tez engine, SessionState.start might bring extra logic to
initialize tez stuff,
- // which is useless for spark.
- if (hiveConf.get("hive.execution.engine") == "tez") {
- logWarning("Detected HiveConf hive.execution.engine is 'tez' and will be
reset to 'mr'" +
+ // If this is non-mr engine, e.g. spark, tez, SessionState.start might
bring extra logic to
+ // initialize spark or tez stuff, which is useless for spark.
+ val engine = hiveConf.get("hive.execution.engine")
+ if (engine != "mr") {
+ logWarning(s"Detected HiveConf hive.execution.engine is '$engine' and
will be reset to 'mr'" +
" to disable useless hive logic")
hiveConf.set("hive.execution.engine", "mr", SOURCE_SPARK)
}
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
index 015078f269f9..e0d5236e1e01 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
@@ -57,16 +57,18 @@ class HiveClientSuites extends SparkFunSuite with
HiveClientVersions {
assert("success" === client.getConf("test", null))
}
- test("override useless and side-effect hive configurations ") {
- val hadoopConf = new Configuration()
- // These hive flags should be reset by spark
- hadoopConf.setBoolean("hive.cbo.enable", true)
- hadoopConf.setBoolean("hive.session.history.enabled", true)
- hadoopConf.set("hive.execution.engine", "tez")
- val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
- assert(!client.getConf("hive.cbo.enable", "true").toBoolean)
- assert(!client.getConf("hive.session.history.enabled", "true").toBoolean)
- assert(client.getConf("hive.execution.engine", "tez") === "mr")
+ test("override useless and side-effect hive configurations") {
+ Seq("spark", "tez").foreach { hiveExecEngine =>
+ val hadoopConf = new Configuration()
+ // These hive flags should be reset by spark
+ hadoopConf.setBoolean("hive.cbo.enable", true)
+ hadoopConf.setBoolean("hive.session.history.enabled", true)
+ hadoopConf.set("hive.execution.engine", hiveExecEngine)
+ val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
+ assert(!client.getConf("hive.cbo.enable", "true").toBoolean)
+ assert(!client.getConf("hive.session.history.enabled", "true").toBoolean)
+ assert(client.getConf("hive.execution.engine", hiveExecEngine) === "mr")
+ }
}
private def getNestedMessages(e: Throwable): String = {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]