(sedona) branch master updated: [GH-2659] Fix file-based readers on Databricks by using reflection for DataSource.checkAndGlobPathIfNecessary (#2660)

jiayu Wed, 18 Feb 2026 14:45:26 -0800

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new 59e836b48b [GH-2659] Fix file-based readers on Databricks by using 
reflection for DataSource.checkAndGlobPathIfNecessary (#2660)
59e836b48b is described below

commit 59e836b48bca9a3e31e1799175e8bdc9790b9bc8
Author: Jia Yu <[email protected]>
AuthorDate: Wed Feb 18 15:45:10 2026 -0700

    [GH-2659] Fix file-based readers on Databricks by using reflection for 
DataSource.checkAndGlobPathIfNecessary (#2660)
---
 .../datasources/SedonaFileIndexHelper.scala        | 59 +++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/SedonaFileIndexHelper.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/SedonaFileIndexHelper.scala
index 7971a1b4ba..7f670f64b4 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/SedonaFileIndexHelper.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/SedonaFileIndexHelper.scala
@@ -18,6 +18,8 @@
  */
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -36,6 +38,61 @@ import scala.collection.JavaConverters._
  */
 object SedonaFileIndexHelper {
 
+  /**
+   * Cached reflective reference to [[DataSource.checkAndGlobPathIfNecessary]].
+   *
+   * <p>We call this method via reflection to avoid binary incompatibility 
between OSS Apache
+   * Spark and Databricks Runtime. On OSS Spark (3.5, 4.0, 4.1, etc.) this 
method has default
+   * parameter values, but on Databricks Runtime (both Spark 3.5 and 4.0) the 
same method has all
+   * required parameters with no defaults, and may also differ in parameter 
count.
+   *
+   * <p>Direct calls with named/default parameters cause the Scala compiler to 
generate synthetic
+   * {@code $default$N()} accessor methods in the bytecode. When these 
accessors do not exist at
+   * runtime (as is the case on Databricks), a {@link NoSuchMethodError} is 
thrown. Reflection
+   * avoids this by resolving the method at runtime.
+   */
+  private lazy val checkAndGlobMethod: java.lang.reflect.Method = {
+    DataSource.getClass.getMethods
+      .filter(_.getName == "checkAndGlobPathIfNecessary")
+      .headOption
+      .getOrElse(
+        throw new 
NoSuchMethodException("DataSource.checkAndGlobPathIfNecessary not found"))
+  }
+
+  private def checkAndGlobPathIfNecessary(
+      paths: Seq[String],
+      hadoopConf: Configuration,
+      checkEmptyGlobPath: Boolean,
+      checkFilesExist: Boolean,
+      enableGlobbing: Boolean): Seq[Path] = {
+    val method = checkAndGlobMethod
+    val args: Array[AnyRef] = method.getParameterCount match {
+      case 6 =>
+        // OSS Apache Spark (3.x and 4.x): 6 parameters with numThreads at 
position 5
+        Array(
+          paths,
+          hadoopConf,
+          java.lang.Boolean.valueOf(checkEmptyGlobPath),
+          java.lang.Boolean.valueOf(checkFilesExist),
+          Integer.valueOf(40),
+          java.lang.Boolean.valueOf(enableGlobbing))
+      case _ =>
+        // Databricks Runtime: 5 parameters (no numThreads)
+        Array(
+          paths,
+          hadoopConf,
+          java.lang.Boolean.valueOf(checkEmptyGlobPath),
+          java.lang.Boolean.valueOf(checkFilesExist),
+          java.lang.Boolean.valueOf(enableGlobbing))
+    }
+    try {
+      method.invoke(DataSource, args: _*).asInstanceOf[Seq[Path]]
+    } catch {
+      case e: java.lang.reflect.InvocationTargetException =>
+        throw e.getCause
+    }
+  }
+
   /**
    * Build an [[InMemoryFileIndex]] for the given paths, resolving globs if 
necessary, without the
    * streaming metadata directory check.
@@ -49,7 +106,7 @@ object SedonaFileIndexHelper {
     val hadoopConf = 
sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
     val globPathsEnabled =
       Option(options.get("globPaths")).map(v => 
java.lang.Boolean.parseBoolean(v)).getOrElse(true)
-    val rootPathsSpecified = DataSource.checkAndGlobPathIfNecessary(
+    val rootPathsSpecified = checkAndGlobPathIfNecessary(
       paths,
       hadoopConf,
       checkEmptyGlobPath = true,

(sedona) branch master updated: [GH-2659] Fix file-based readers on Databricks by using reflection for DataSource.checkAndGlobPathIfNecessary (#2660)

Reply via email to