This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a3266b411723 [SPARK-46695][SQL][HIVE] Always setting 
hive.execution.engine to mr
a3266b411723 is described below

commit a3266b411723310ec10fc1843ddababc15249db0
Author: Cheng Pan <[email protected]>
AuthorDate: Fri Jan 12 03:10:17 2024 -0800

    [SPARK-46695][SQL][HIVE] Always setting hive.execution.engine to mr
    
    ### What changes were proposed in this pull request?
    
    Previously, we only set `hive.execution.engine` to "mr" when the original 
value is "tez", this PR aims to always set `hive.execution.engine` to "mr".
    
    ### Why are the changes needed?
    
    The Apache Hive supports three engines MR, Spark, Tez, same as "tez", when 
setting `hive.execution.engine` to "spark", this may also initial some 
unexpected components.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    ```
    SPARK_TEST_HIVE_CLIENT_VERSIONS=2.3,3.1 \
    build/sbt -Phive 'hive/testOnly *HiveClientSuites -- -z "override useless 
and side-effect hive configurations"'
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #44704 from pan3793/SPARK-46695.
    
    Authored-by: Cheng Pan <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/sql/hive/client/HiveClientImpl.scala     |  9 +++++----
 .../spark/sql/hive/client/HiveClientSuites.scala   | 22 ++++++++++++----------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 72388a8d4b98..946e698dc6ab 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -1318,10 +1318,11 @@ private[hive] object HiveClientImpl extends Logging {
         " false to disable useless hive logic")
       hiveConf.setBoolean("hive.session.history.enabled", false)
     }
-    // If this is tez engine, SessionState.start might bring extra logic to 
initialize tez stuff,
-    // which is useless for spark.
-    if (hiveConf.get("hive.execution.engine") == "tez") {
-      logWarning("Detected HiveConf hive.execution.engine is 'tez' and will be 
reset to 'mr'" +
+    // If this is non-mr engine, e.g. spark, tez, SessionState.start might 
bring extra logic to
+    // initialize spark or tez stuff, which is useless for spark.
+    val engine = hiveConf.get("hive.execution.engine")
+    if (engine != "mr") {
+      logWarning(s"Detected HiveConf hive.execution.engine is '$engine' and 
will be reset to 'mr'" +
         " to disable useless hive logic")
       hiveConf.set("hive.execution.engine", "mr", SOURCE_SPARK)
     }
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
index 015078f269f9..e0d5236e1e01 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
@@ -57,16 +57,18 @@ class HiveClientSuites extends SparkFunSuite with 
HiveClientVersions {
     assert("success" === client.getConf("test", null))
   }
 
-  test("override useless and side-effect hive configurations ") {
-    val hadoopConf = new Configuration()
-    // These hive flags should be reset by spark
-    hadoopConf.setBoolean("hive.cbo.enable", true)
-    hadoopConf.setBoolean("hive.session.history.enabled", true)
-    hadoopConf.set("hive.execution.engine", "tez")
-    val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
-    assert(!client.getConf("hive.cbo.enable", "true").toBoolean)
-    assert(!client.getConf("hive.session.history.enabled", "true").toBoolean)
-    assert(client.getConf("hive.execution.engine", "tez") === "mr")
+  test("override useless and side-effect hive configurations") {
+    Seq("spark", "tez").foreach { hiveExecEngine =>
+      val hadoopConf = new Configuration()
+      // These hive flags should be reset by spark
+      hadoopConf.setBoolean("hive.cbo.enable", true)
+      hadoopConf.setBoolean("hive.session.history.enabled", true)
+      hadoopConf.set("hive.execution.engine", hiveExecEngine)
+      val client = buildClient(HiveUtils.builtinHiveVersion, hadoopConf)
+      assert(!client.getConf("hive.cbo.enable", "true").toBoolean)
+      assert(!client.getConf("hive.session.history.enabled", "true").toBoolean)
+      assert(client.getConf("hive.execution.engine", hiveExecEngine) === "mr")
+    }
   }
 
   private def getNestedMessages(e: Throwable): String = {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to