Re: [PR] [HUDI-7915] Spark 4 support [hudi]

via GitHub Sat, 20 Sep 2025 12:26:28 -0700


yihua commented on code in PR #12772:
URL: https://github.com/apache/hudi/pull/12772#discussion_r2365707917



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieDataSourceHelper.scala:
##########
@@ -18,13 +18,16 @@
 
 package org.apache.hudi
 
-import org.apache.hudi.testutils.HoodieClientTestBase
+import org.apache.hudi.testutils.{DisabledOnSpark4, HoodieClientTestBase}
 
 import org.apache.spark.sql.functions.expr
 import org.apache.spark.sql.sources.Filter
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.Test
 
+@DisabledOnSpark4

Review Comment:
   To revisit



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionBucketIndexSupport.scala:
##########
@@ -172,30 +172,32 @@ class TestPartitionBucketIndexSupport extends 
TestBucketIndexSupport {
 
   def exprFilePathAnswerCheck(bucketIndexSupport: PartitionBucketIndexSupport, 
exprRaw: String, expectResult: Set[String],
                               allFileStatus: Set[String]): Unit = {
-    val resolveExpr = HoodieCatalystExpressionUtils.resolveExpr(spark, 
exprRaw, structSchema)
-    val optimizerPlan = 
spark.sessionState.optimizer.execute(DummyExpressionHolder(Seq(resolveExpr)))
-    val optimizerExpr = 
optimizerPlan.asInstanceOf[DummyExpressionHolder].exprs.head
-
-    // split input files into different partitions
-    val partitionPath1 = DEFAULT_PARTITION_PATH(0)
-    val allFileSlices1: Seq[FileSlice] = allFileStatus.slice(0, 
3).map(fileName => {
-      val slice = new FileSlice(partitionPath1, "00000000000000000", 
FSUtils.getFileId(fileName))
-      slice.setBaseFile(new HoodieBaseFile(new StoragePathInfo(new 
StoragePath(fileName), 0L, false, 0, 0, 0)))
-      slice
-    }).toSeq
-
-    val partitionPath2 = DEFAULT_PARTITION_PATH(1)
-    val allFileSlices2: Seq[FileSlice] = allFileStatus.slice(3, 
5).map(fileName => {
-      val slice = new FileSlice(partitionPath1, "00000000000000000", 
FSUtils.getFileId(fileName))
-      slice.setBaseFile(new HoodieBaseFile(new StoragePathInfo(new 
StoragePath(fileName), 0L, false, 0, 0, 0)))
-      slice
-    }).toSeq
-
-    val input = Seq((Option.apply(new 
BaseHoodieTableFileIndex.PartitionPath(partitionPath1, Array())), 
allFileSlices1),
-      (Option.apply(new BaseHoodieTableFileIndex.PartitionPath(partitionPath2, 
Array())), allFileSlices2))
-    val candidate = bucketIndexSupport.computeCandidateFileNames(fileIndex, 
splitConjunctivePredicates(optimizerExpr),
-      Seq(), input, false)
-
-    assert(candidate.get.equals(expectResult))
+    if (!HoodieSparkUtils.gteqSpark4_0) { // TODO (HUDI-9403)

Review Comment:
   To revisit



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSecondaryIndexDataTypes.scala:
##########
@@ -156,13 +157,13 @@ class TestSecondaryIndexDataTypes extends 
HoodieSparkSqlTestBase {
        """.stripMargin)
       // Define supported columns with multiple test values for comprehensive 
validation
       // Filter based on values of string, long, double, int. Spark will take 
care of the type cast.
-      val supportedColumns = Seq(
+            val supportedColumns = Seq(

Review Comment:
   nit: fix indentation



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala:
##########
@@ -95,7 +89,7 @@ class TestParquetColumnProjection extends 
SparkClientFunctionalTestHarness with
     // Stats for the reads fetching only _projected_ columns (note how amount 
of bytes read
     // increases along w/ the # of columns)
     val projectedColumnsReadStats: Array[(String, Long)] =
-      if (HoodieSparkUtils.isSpark3)
+      if (HoodieSparkUtils.gteqSpark3_3_2)

Review Comment:
   We could remove all these checks given the minimum supported Spark is Spark 
3.3.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7915] Spark 4 support [hudi]

Reply via email to