Re: [PR] [HUDI-7915] Spark 4 support [hudi]

via GitHub Tue, 06 May 2025 13:56:28 -0700


yihua commented on code in PR #12772:
URL: https://github.com/apache/hudi/pull/12772#discussion_r2076266780



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestMergeIntoTableWithNonRecordKeyField.scala:
##########
@@ -133,7 +133,7 @@ class TestMergeIntoTableWithNonRecordKeyField extends 
HoodieSparkSqlTestBase wit
              |""".stripMargin)
 
         if (sparkSqlOptimizedWrites) {
-          val errorMessage2 = "Hudi tables with record key are required to 
match on all record key columns. Column: 'name' not found"
+          val errorMessage2 = "Hudi tables with primary key are required to 
match on all primary key columns. Column: 'name' not found"

Review Comment:
   nit: revert unnecessary changes



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionBucketIndexSupport.scala:
##########
@@ -172,30 +172,39 @@ class TestPartitionBucketIndexSupport extends 
TestBucketIndexSupport {
 
   def exprFilePathAnswerCheck(bucketIndexSupport: PartitionBucketIndexSupport, 
exprRaw: String, expectResult: Set[String],
                               allFileStatus: Set[String]): Unit = {
-    val resolveExpr = HoodieCatalystExpressionUtils.resolveExpr(spark, 
exprRaw, structSchema)
-    val optimizerPlan = 
spark.sessionState.optimizer.execute(DummyExpressionHolder(Seq(resolveExpr)))
-    val optimizerExpr = 
optimizerPlan.asInstanceOf[DummyExpressionHolder].exprs.head
-
-    // split input files into different partitions
-    val partitionPath1 = DEFAULT_PARTITION_PATH(0)
-    val allFileSlices1: Seq[FileSlice] = allFileStatus.slice(0, 
3).map(fileName => {
-      val slice = new FileSlice(partitionPath1, "00000000000000000", 
FSUtils.getFileId(fileName))
-      slice.setBaseFile(new HoodieBaseFile(new StoragePathInfo(new 
StoragePath(fileName), 0L, false, 0, 0, 0)))
-      slice
-    }).toSeq
-
-    val partitionPath2 = DEFAULT_PARTITION_PATH(1)
-    val allFileSlices2: Seq[FileSlice] = allFileStatus.slice(3, 
5).map(fileName => {
-      val slice = new FileSlice(partitionPath1, "00000000000000000", 
FSUtils.getFileId(fileName))
-      slice.setBaseFile(new HoodieBaseFile(new StoragePathInfo(new 
StoragePath(fileName), 0L, false, 0, 0, 0)))
-      slice
-    }).toSeq
-
-    val input = Seq((Option.apply(new 
BaseHoodieTableFileIndex.PartitionPath(partitionPath1, Array())), 
allFileSlices1),
-      (Option.apply(new BaseHoodieTableFileIndex.PartitionPath(partitionPath2, 
Array())), allFileSlices2))
-    val candidate = bucketIndexSupport.computeCandidateFileNames(fileIndex, 
splitConjunctivePredicates(optimizerExpr),
-      Seq(), input, false)
-
-    assert(candidate.get.equals(expectResult))
+    // On Spark 4 got org.apache.spark.SparkException:
+    // [PLAN_VALIDATION_FAILED_RULE_EXECUTOR] The input plan of 
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2 is invalid: 
Aliases A#2143653L are dangling in the references for plan:
+    // DummyExpressionHolder [(A#2143653L = cast(3 as bigint))]
+    //
+    // Previous schema:
+    // Previous plan: DummyExpressionHolder [(A#2143653L = cast(3 as bigint))]
+    //  SQLSTATE: XXKD0
+    if (!HoodieSparkUtils.gteqSpark4_0) { // TODO fix later

Review Comment:
   Add a JIRA to track the issue and details, attach `TODO(HUDI-WXYZ)`, and 
remove the comments here



##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala:
##########
@@ -884,6 +884,9 @@ class TestCreateTable extends HoodieSparkSqlTestBase {
           .mode(SaveMode.Overwrite)
           .save(tablePath)
 
+        val errorMsg =

Review Comment:
   nit: unnecessary change



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7915] Spark 4 support [hudi]

Reply via email to