This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a380380428db [SPARK-52265][SQL][TEST] Fix regex leading to empty 
PROCESS_TABLES.testingVersions in HiveExternalCatalogVersionsSuite
a380380428db is described below

commit a380380428dbfcd03d7d76b876b33b8534caa4c8
Author: Emilie Faracci <[email protected]>
AuthorDate: Sat Jun 7 00:34:40 2025 +0800

    [SPARK-52265][SQL][TEST] Fix regex leading to empty 
PROCESS_TABLES.testingVersions in HiveExternalCatalogVersionsSuite
    
    ### What changes were proposed in this pull request?
    
    Fix the version parsing logic in `HiveExternalCatalogVersionsSuite` to 
properly handle new artifact paths in 
https://dist.apache.org/repos/dist/release/spark/ so that "backward 
compatibility" test can be run.
    
    This change creates a constant `val SparkVersionPattern = """<a 
href="spark-(\d.\d.\d)/">""".r` for more precise version matching, and removes 
redundant `.filterNot(_.contains("preview"))` which is no longer needed.
    
    ### Why are the changes needed?
    
    The suite is failing to execute the "backward compatibility" test due to 
parsing errors with testing versions. The current implementation fails to parse 
versions when encountering new paths like `spark-connect-swift-0.1.0/` and 
`spark-kubernetes-operator-0.1.0/` in 
https://dist.apache.org/repos/dist/release/spark/.
    
    This leads to `PROCESS_TABLES.testingVersions` being empty, and in turn a 
logError: "Exception encountered when invoking run on a nested suite - Fail to 
get the latest Spark versions to test". As a result, the condition is not met 
to run the test.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    
    Executed local build and test for `HiveExternalCatalogVersionsSuite`:
    
    `build/mvn -pl sql/hive-Dtest=none 
-DwildcardSuites=org.apache.spark.sql.hive.HiveExternalCatalogVersionsSuite 
test-compile scalatest:test`
    
    Verified that the reported error no longer appears, "backward 
compatibility" test runs successfully, and `PROCESS_TABLES.testingVersions` now 
correctly contains "3.5.5" when printed out, which was previously empty.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #50989 from efaracci018/fix-testingVersions.
    
    Lead-authored-by: Emilie Faracci <[email protected]>
    Co-authored-by: efaracci018 <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala   | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 684e4c53e4ef..32f2c6d508d3 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -96,7 +96,12 @@ class HiveExternalCatalogVersionsSuite extends 
SparkSubmitTestUtils {
       mirrors.distinct :+ "https://archive.apache.org/dist"; :+ 
PROCESS_TABLES.releaseMirror
     logInfo(s"Trying to download Spark $version from $sites")
     for (site <- sites) {
-      val filename = s"spark-$version-bin-hadoop3-scala2.13.tgz"
+      val scalaVersion = version match {
+        case v if v.startsWith("3.") => "-scala2.13"
+        case v if v.startsWith("4.") => ""
+        case _ => fail(s"Spark version $version is unexpected")
+      }
+      val filename = s"spark-$version-bin-hadoop3$scalaVersion.tgz"
       val url = s"$site/spark/spark-$version/$filename"
       logInfo(s"Downloading Spark $version from $url")
       try {
@@ -262,13 +267,13 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils 
{
   val testingVersions: Seq[String] = if (isPythonVersionAvailable &&
       SystemUtils.isJavaVersionAtMost(JavaVersion.JAVA_17)) {
     import scala.io.Source
+    val sparkVersionPattern = """<a href="spark-(\d.\d.\d)/">""".r
     try Utils.tryWithResource(
       Source.fromURL(s"$releaseMirror/spark")) { source =>
       source.mkString
         .split("\n")
-        .filter(_.contains("""<a href="spark-"""))
-        .filterNot(_.contains("preview"))
-        .map("""<a 
href="spark-(\d.\d.\d)/">""".r.findFirstMatchIn(_).get.group(1))
+        .filter(sparkVersionPattern.unanchored.matches(_))
+        .map(sparkVersionPattern.findFirstMatchIn(_).get.group(1))
         .filter(_ < org.apache.spark.SPARK_VERSION)
         .filterNot(skipReleaseVersions.contains).toImmutableArraySeq
     } catch {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to