This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch branch-1.9
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/branch-1.9 by this push:
     new 38d67dd2e [KYUUBI #6413] SPARK_HOME detection supports Spark 4
38d67dd2e is described below

commit 38d67dd2e117cd250cc45885f30339f770999933
Author: Cheng Pan <[email protected]>
AuthorDate: Thu May 23 14:26:38 2024 +0800

    [KYUUBI #6413] SPARK_HOME detection supports Spark 4
    
    # Description
    
    When `SPARK_HOME` is not set explicitly, the Kyuubi server supports 
detecting it based on Scala versions, while the rules are not applicable for 
Spark 4.
    
    This PR enhances the SPARK_HOME detection logic to make it support both 
Spark 3 and Spark 4.
    
    The above logic is mainly used for testing purposes, the change does not 
affect users who configure `SPARK_HOME` in `kyuubi-env.sh`.
    
    ## Types of changes
    
    - [ ] Bugfix (non-breaking change which fixes an issue)
    - [x] New feature (non-breaking change which adds functionality)
    - [ ] Breaking change (fix or feature that would cause existing 
functionality to change)
    
    ## Test Plan
    
    #### Related Unit Tests
    
    - `SparkProcessBuilderSuite`
    
    ---
    
    # Checklist 📝
    
    - [x] This patch was not authored or co-authored using [Generative 
Tooling](https://www.apache.org/legal/generative-tooling.html)
    
    **Be nice. Be informative.**
    
    Closes #6413 from pan3793/spark4-home.
    
    Closes #6413
    
    20e71fd7d [Cheng Pan] SPARK_HOME detection supports Spark 4
    
    Authored-by: Cheng Pan <[email protected]>
    Signed-off-by: Cheng Pan <[email protected]>
    (cherry picked from commit b89c185eec330143c204eb84ddb60f24624d1079)
    Signed-off-by: Cheng Pan <[email protected]>
---
 .../kyuubi/engine/spark/SparkProcessBuilder.scala  | 21 +++++-----
 .../engine/spark/SparkProcessBuilderSuite.scala    | 45 +++++++++++++---------
 2 files changed, 38 insertions(+), 28 deletions(-)

diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
index a651e99ef..fcf24b930 100644
--- 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
+++ 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
@@ -117,11 +117,11 @@ class SparkProcessBuilder(
   }
 
   override protected lazy val engineHomeDirFilter: FileFilter = file => {
-    val r = SCALA_COMPILE_VERSION match {
-      case "2.12" => SPARK_HOME_REGEX_SCALA_212
-      case "2.13" => SPARK_HOME_REGEX_SCALA_213
+    val patterns = SCALA_COMPILE_VERSION match {
+      case "2.12" => Seq(SPARK3_HOME_REGEX_SCALA_212)
+      case "2.13" => Seq(SPARK3_HOME_REGEX_SCALA_213, 
SPARK4_HOME_REGEX_SCALA_213)
     }
-    file.isDirectory && r.findFirstMatchIn(file.getName).isDefined
+    file.isDirectory && 
patterns.exists(_.findFirstMatchIn(file.getName).isDefined)
   }
 
   override protected[kyuubi] lazy val commands: Iterable[String] = {
@@ -364,11 +364,14 @@ object SparkProcessBuilder {
   final private val SPARK_CONF_FILE_NAME = "spark-defaults.conf"
 
   final private[kyuubi] val SPARK_CORE_SCALA_VERSION_REGEX =
-    """^spark-core_(\d\.\d+).*.jar$""".r
+    """^spark-core_(\d\.\d+)-.*\.jar$""".r
 
-  final private[kyuubi] val SPARK_HOME_REGEX_SCALA_212 =
-    """^spark-\d+\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r
+  final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_212 =
+    """^spark-3\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r
 
-  final private[kyuubi] val SPARK_HOME_REGEX_SCALA_213 =
-    """^spark-\d+\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala\d+(\.\d+)?$""".r
+  final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_213 =
+    """^spark-3\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala2\.13$""".r
+
+  final private[kyuubi] val SPARK4_HOME_REGEX_SCALA_213 =
+    """^spark-4\.\d+\.\d+(-\w*)?-bin-hadoop\d(\.\d+)?+$""".r
 }
diff --git 
a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
 
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
index 4ee98a080..5f3bae124 100644
--- 
a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
+++ 
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
@@ -403,36 +403,43 @@ class SparkProcessBuilderSuite extends 
KerberizedTestHelper with MockitoSugar {
       "spark-core_2.13-3.5.0-abc-20230921.jar",
       "spark-core_2.13-3.5.0-xyz-1.2.3.jar",
       "spark-core_2.13-3.5.0.1.jar",
-      "spark-core_2.13.2-3.5.0.jar").foreach { f =>
+      "spark-core_2.13-4.0.0-preview1.jar",
+      "spark-core_2.13-4.0.0.jar").foreach { f =>
       assertResult("2.13")(builder.extractSparkCoreScalaVersion(Seq(f)))
     }
 
     Seq(
       "spark-dummy_2.13-3.5.0.jar",
       "spark-core_2.13-3.5.0.1.zip",
-      "yummy-spark-core_2.13-3.5.0.jar").foreach { f =>
+      "yummy-spark-core_2.13-3.5.0.jar",
+      "spark-core_2.13.2-3.5.0.jar").foreach { f =>
       
assertThrows[KyuubiException](builder.extractSparkCoreScalaVersion(Seq(f)))
     }
   }
 
   test("match scala version of spark home") {
-    SCALA_COMPILE_VERSION match {
-      case "2.12" => Seq(
-          "spark-3.2.4-bin-hadoop3.2",
-          "spark-3.2.4-bin-hadoop2.7",
-          "spark-3.4.1-bin-hadoop3")
-          .foreach { sparkHome =>
-            assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
-            assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
-          }
-      case "2.13" => Seq(
-          "spark-3.2.4-bin-hadoop3.2-scala2.13",
-          "spark-3.4.1-bin-hadoop3-scala2.13",
-          "spark-3.5.0-bin-hadoop3-scala2.13")
-          .foreach { sparkHome =>
-            assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
-            assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
-          }
+    Seq(
+      "spark-3.2.4-bin-hadoop3.2",
+      "spark-3.2.4-bin-hadoop2.7",
+      "spark-3.4.1-bin-hadoop3").foreach { SPARK3_HOME_SCALA_212 =>
+      assertMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_212)
+      assertNotMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_213)
+      assertNotMatches(SPARK3_HOME_SCALA_212, SPARK4_HOME_REGEX_SCALA_213)
+    }
+    Seq(
+      "spark-3.2.4-bin-hadoop3.2-scala2.13",
+      "spark-3.4.1-bin-hadoop3-scala2.13",
+      "spark-3.5.0-bin-hadoop3-scala2.13").foreach { SPARK3_HOME_SCALA_213 =>
+      assertMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
+      assertNotMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
+      assertNotMatches(SPARK3_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
+    }
+    Seq(
+      "spark-4.0.0-preview1-bin-hadoop3",
+      "spark-4.0.0-bin-hadoop3").foreach { SPARK4_HOME_SCALA_213 =>
+      assertMatches(SPARK4_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
+      assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
+      assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
     }
   }
 

Reply via email to