This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch branch-0.x
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/branch-0.x by this push:
new 284b95420c69 feat: Partition predicate fix for Databricks runtime
support (#18257)
284b95420c69 is described below
commit 284b95420c692615e933ba49cc2c0f68e175e747
Author: Y Ethan Guo <[email protected]>
AuthorDate: Fri Feb 27 13:45:21 2026 -0800
feat: Partition predicate fix for Databricks runtime support (#18257)
---
.../main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala | 4 ++--
.../scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala | 10 +++++++++-
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
index 306e5aa457be..9da90852a4e4 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
@@ -40,7 +40,7 @@ import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference,
BoundReference, EmptyRow, EqualTo, Expression, InterpretedPredicate, Literal}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference,
BoundReference, EmptyRow, EqualTo, Expression, Literal}
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.catalyst.{InternalRow, expressions}
import org.apache.spark.sql.execution.datasources.{FileStatusCache, NoopCache}
@@ -246,7 +246,7 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
// the whole table
if (haveProperPartitionValues(partitionPaths.toSeq) &&
partitionSchema.nonEmpty) {
val predicate = partitionPruningPredicates.reduce(expressions.And)
- val boundPredicate = InterpretedPredicate(predicate.transform {
+ val boundPredicate =
sparkAdapter.createInterpretedPredicate(predicate.transform {
case a: AttributeReference =>
val index = partitionSchema.indexWhere(a.name == _.name)
BoundReference(index, partitionSchema(index).dataType, nullable =
true)
diff --git
a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
index 44ae9a5b49cc..21691407b0c7 100644
---
a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
+++
b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala
@@ -78,7 +78,15 @@ abstract class BaseSpark3Adapter extends SparkAdapter with
Logging {
}
override def createInterpretedPredicate(e: Expression): InterpretedPredicate
= {
- Predicate.createInterpreted(e)
+ try {
+ Predicate.createInterpreted(e)
+ } catch {
+ case _: NoSuchMethodException | _: NoSuchMethodError | _:
IllegalArgumentException =>
+ // Fallback: certain Spark runtimes (e.g. Databricks) use a 2-arg
constructor
+ val clazz = classOf[InterpretedPredicate]
+ val ctor = clazz.getConstructor(classOf[Expression], classOf[Boolean])
+ ctor.newInstance(e, java.lang.Boolean.FALSE)
+ }
}
override def createRelation(sqlContext: SQLContext,