Re: [PR] [HUDI-7915] Spark 4 support [hudi]

via GitHub Mon, 12 May 2025 01:57:04 -0700


wombatu-kun commented on code in PR #12772:
URL: https://github.com/apache/hudi/pull/12772#discussion_r2084182524



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionBucketIndexSupport.scala:
##########
@@ -172,30 +172,39 @@ class TestPartitionBucketIndexSupport extends 
TestBucketIndexSupport {
 
   def exprFilePathAnswerCheck(bucketIndexSupport: PartitionBucketIndexSupport, 
exprRaw: String, expectResult: Set[String],
                               allFileStatus: Set[String]): Unit = {
-    val resolveExpr = HoodieCatalystExpressionUtils.resolveExpr(spark, 
exprRaw, structSchema)
-    val optimizerPlan = 
spark.sessionState.optimizer.execute(DummyExpressionHolder(Seq(resolveExpr)))
-    val optimizerExpr = 
optimizerPlan.asInstanceOf[DummyExpressionHolder].exprs.head
-
-    // split input files into different partitions
-    val partitionPath1 = DEFAULT_PARTITION_PATH(0)
-    val allFileSlices1: Seq[FileSlice] = allFileStatus.slice(0, 
3).map(fileName => {
-      val slice = new FileSlice(partitionPath1, "00000000000000000", 
FSUtils.getFileId(fileName))
-      slice.setBaseFile(new HoodieBaseFile(new StoragePathInfo(new 
StoragePath(fileName), 0L, false, 0, 0, 0)))
-      slice
-    }).toSeq
-
-    val partitionPath2 = DEFAULT_PARTITION_PATH(1)
-    val allFileSlices2: Seq[FileSlice] = allFileStatus.slice(3, 
5).map(fileName => {
-      val slice = new FileSlice(partitionPath1, "00000000000000000", 
FSUtils.getFileId(fileName))
-      slice.setBaseFile(new HoodieBaseFile(new StoragePathInfo(new 
StoragePath(fileName), 0L, false, 0, 0, 0)))
-      slice
-    }).toSeq
-
-    val input = Seq((Option.apply(new 
BaseHoodieTableFileIndex.PartitionPath(partitionPath1, Array())), 
allFileSlices1),
-      (Option.apply(new BaseHoodieTableFileIndex.PartitionPath(partitionPath2, 
Array())), allFileSlices2))
-    val candidate = bucketIndexSupport.computeCandidateFileNames(fileIndex, 
splitConjunctivePredicates(optimizerExpr),
-      Seq(), input, false)
-
-    assert(candidate.get.equals(expectResult))
+    // On Spark 4 got org.apache.spark.SparkException:
+    // [PLAN_VALIDATION_FAILED_RULE_EXECUTOR] The input plan of 
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2 is invalid: 
Aliases A#2143653L are dangling in the references for plan:
+    // DummyExpressionHolder [(A#2143653L = cast(3 as bigint))]
+    //
+    // Previous schema:
+    // Previous plan: DummyExpressionHolder [(A#2143653L = cast(3 as bigint))]
+    //  SQLSTATE: XXKD0
+    if (!HoodieSparkUtils.gteqSpark4_0) { // TODO fix later

Review Comment:
   https://issues.apache.org/jira/browse/HUDI-9403



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7915] Spark 4 support [hudi]

Reply via email to