This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch branch-1.9
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/branch-1.9 by this push:
new 38d67dd2e [KYUUBI #6413] SPARK_HOME detection supports Spark 4
38d67dd2e is described below
commit 38d67dd2e117cd250cc45885f30339f770999933
Author: Cheng Pan <[email protected]>
AuthorDate: Thu May 23 14:26:38 2024 +0800
[KYUUBI #6413] SPARK_HOME detection supports Spark 4
# Description
When `SPARK_HOME` is not set explicitly, the Kyuubi server supports
detecting it based on Scala versions, while the rules are not applicable for
Spark 4.
This PR enhances the SPARK_HOME detection logic to make it support both
Spark 3 and Spark 4.
The above logic is mainly used for testing purposes, the change does not
affect users who configure `SPARK_HOME` in `kyuubi-env.sh`.
## Types of changes
- [ ] Bugfix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
## Test Plan
#### Related Unit Tests
- `SparkProcessBuilderSuite`
---
# Checklist 📝
- [x] This patch was not authored or co-authored using [Generative
Tooling](https://www.apache.org/legal/generative-tooling.html)
**Be nice. Be informative.**
Closes #6413 from pan3793/spark4-home.
Closes #6413
20e71fd7d [Cheng Pan] SPARK_HOME detection supports Spark 4
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Cheng Pan <[email protected]>
(cherry picked from commit b89c185eec330143c204eb84ddb60f24624d1079)
Signed-off-by: Cheng Pan <[email protected]>
---
.../kyuubi/engine/spark/SparkProcessBuilder.scala | 21 +++++-----
.../engine/spark/SparkProcessBuilderSuite.scala | 45 +++++++++++++---------
2 files changed, 38 insertions(+), 28 deletions(-)
diff --git
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
index a651e99ef..fcf24b930 100644
---
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
+++
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
@@ -117,11 +117,11 @@ class SparkProcessBuilder(
}
override protected lazy val engineHomeDirFilter: FileFilter = file => {
- val r = SCALA_COMPILE_VERSION match {
- case "2.12" => SPARK_HOME_REGEX_SCALA_212
- case "2.13" => SPARK_HOME_REGEX_SCALA_213
+ val patterns = SCALA_COMPILE_VERSION match {
+ case "2.12" => Seq(SPARK3_HOME_REGEX_SCALA_212)
+ case "2.13" => Seq(SPARK3_HOME_REGEX_SCALA_213,
SPARK4_HOME_REGEX_SCALA_213)
}
- file.isDirectory && r.findFirstMatchIn(file.getName).isDefined
+ file.isDirectory &&
patterns.exists(_.findFirstMatchIn(file.getName).isDefined)
}
override protected[kyuubi] lazy val commands: Iterable[String] = {
@@ -364,11 +364,14 @@ object SparkProcessBuilder {
final private val SPARK_CONF_FILE_NAME = "spark-defaults.conf"
final private[kyuubi] val SPARK_CORE_SCALA_VERSION_REGEX =
- """^spark-core_(\d\.\d+).*.jar$""".r
+ """^spark-core_(\d\.\d+)-.*\.jar$""".r
- final private[kyuubi] val SPARK_HOME_REGEX_SCALA_212 =
- """^spark-\d+\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r
+ final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_212 =
+ """^spark-3\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r
- final private[kyuubi] val SPARK_HOME_REGEX_SCALA_213 =
- """^spark-\d+\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala\d+(\.\d+)?$""".r
+ final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_213 =
+ """^spark-3\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala2\.13$""".r
+
+ final private[kyuubi] val SPARK4_HOME_REGEX_SCALA_213 =
+ """^spark-4\.\d+\.\d+(-\w*)?-bin-hadoop\d(\.\d+)?+$""".r
}
diff --git
a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
index 4ee98a080..5f3bae124 100644
---
a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
+++
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala
@@ -403,36 +403,43 @@ class SparkProcessBuilderSuite extends
KerberizedTestHelper with MockitoSugar {
"spark-core_2.13-3.5.0-abc-20230921.jar",
"spark-core_2.13-3.5.0-xyz-1.2.3.jar",
"spark-core_2.13-3.5.0.1.jar",
- "spark-core_2.13.2-3.5.0.jar").foreach { f =>
+ "spark-core_2.13-4.0.0-preview1.jar",
+ "spark-core_2.13-4.0.0.jar").foreach { f =>
assertResult("2.13")(builder.extractSparkCoreScalaVersion(Seq(f)))
}
Seq(
"spark-dummy_2.13-3.5.0.jar",
"spark-core_2.13-3.5.0.1.zip",
- "yummy-spark-core_2.13-3.5.0.jar").foreach { f =>
+ "yummy-spark-core_2.13-3.5.0.jar",
+ "spark-core_2.13.2-3.5.0.jar").foreach { f =>
assertThrows[KyuubiException](builder.extractSparkCoreScalaVersion(Seq(f)))
}
}
test("match scala version of spark home") {
- SCALA_COMPILE_VERSION match {
- case "2.12" => Seq(
- "spark-3.2.4-bin-hadoop3.2",
- "spark-3.2.4-bin-hadoop2.7",
- "spark-3.4.1-bin-hadoop3")
- .foreach { sparkHome =>
- assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
- assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
- }
- case "2.13" => Seq(
- "spark-3.2.4-bin-hadoop3.2-scala2.13",
- "spark-3.4.1-bin-hadoop3-scala2.13",
- "spark-3.5.0-bin-hadoop3-scala2.13")
- .foreach { sparkHome =>
- assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
- assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
- }
+ Seq(
+ "spark-3.2.4-bin-hadoop3.2",
+ "spark-3.2.4-bin-hadoop2.7",
+ "spark-3.4.1-bin-hadoop3").foreach { SPARK3_HOME_SCALA_212 =>
+ assertMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_212)
+ assertNotMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_213)
+ assertNotMatches(SPARK3_HOME_SCALA_212, SPARK4_HOME_REGEX_SCALA_213)
+ }
+ Seq(
+ "spark-3.2.4-bin-hadoop3.2-scala2.13",
+ "spark-3.4.1-bin-hadoop3-scala2.13",
+ "spark-3.5.0-bin-hadoop3-scala2.13").foreach { SPARK3_HOME_SCALA_213 =>
+ assertMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
+ assertNotMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
+ assertNotMatches(SPARK3_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
+ }
+ Seq(
+ "spark-4.0.0-preview1-bin-hadoop3",
+ "spark-4.0.0-bin-hadoop3").foreach { SPARK4_HOME_SCALA_213 =>
+ assertMatches(SPARK4_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
+ assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
+ assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
}
}