(hudi) branch branch-0.x updated: [HUDI-9666] Ensure no RLI partition pruning for complex keygen (#18271)

sivabalan Wed, 04 Mar 2026 20:51:47 -0800

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch branch-0.x
in repository https://gitbox.apache.org/repos/asf/hudi.git



The following commit(s) were added to refs/heads/branch-0.x by this push:
     new 011c351d982f [HUDI-9666] Ensure no RLI partition pruning for complex 
keygen (#18271)
011c351d982f is described below

commit 011c351d982f8f8fdf4afddc0f9557ab243d0ded
Author: Lokesh Jain <[email protected]>
AuthorDate: Thu Mar 5 10:21:06 2026 +0530

    [HUDI-9666] Ensure no RLI partition pruning for complex keygen (#18271)
    
    
    ---------
    
    Co-authored-by: Lokesh Jain <[email protected]>
---
 .../org/apache/hudi/RecordLevelIndexSupport.scala  |  4 +++
 .../functional/TestRecordLevelIndexWithSQL.scala   | 29 ++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
index 41e8db018618..bc35a0ed48d3 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
@@ -23,6 +23,7 @@ import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.keygen.KeyGenUtils
+import org.apache.hudi.keygen.constant.KeyGeneratorType
 import org.apache.hudi.metadata.{HoodieTableMetadata, HoodieTableMetadataUtil}
 import org.apache.hudi.storage.StoragePathInfo
 import org.apache.hudi.util.JFunction
@@ -88,6 +89,9 @@ class RecordLevelIndexSupport(spark: SparkSession,
   def filterQueriesWithRecordKey(queryFilters: Seq[Expression]): 
(List[Expression], List[String]) = {
     if (!isIndexAvailable || 
KeyGenUtils.mayUseNewEncodingForComplexKeyGen(metaClient.getTableConfig)) {
       (List.empty, List.empty)
+    } else if 
(KeyGeneratorType.isComplexKeyGenerator(metaClient.getTableConfig)) {
+      // Complex record keys filtering is not yet supported. Support was added 
in HUDI-8432.
+      (List.empty, List.empty)
     } else {
       var recordKeyQueries: List[Expression] = List.empty
       var recordKeys: List[String] = List.empty
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
index 97fdc1e10b21..d55ef5aaf2ae 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
@@ -17,6 +17,7 @@
 
 package org.apache.hudi.functional
 
+import org.apache.hudi.DataSourceWriteOptions.RECORDKEY_FIELD
 import org.apache.hudi.common.model.{FileSlice, HoodieTableType}
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.metadata.HoodieMetadataFileSystemView
@@ -188,4 +189,32 @@ class TestRecordLevelIndexWithSQL extends 
RecordLevelIndexTestBase {
 
     assertEquals(2, 
spark.read.format("hudi").options(hudiOpts).load(dummyTablePath).filter("not_record_key_col
 in ('row1', 'abc')").count())
   }
+
+  @Test
+  def testRLINoPruningWithComplexRecordKeys(): Unit = {
+    var hudiOpts = commonOpts + {
+      RECORDKEY_FIELD.key -> "_row_key,rider"
+    }
+    hudiOpts = hudiOpts + (
+      DataSourceWriteOptions.TABLE_TYPE.key -> "COPY_ON_WRITE",
+      DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true")
+
+    doWriteAndValidateDataAndRecordIndex(hudiOpts,
+      operation = DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL,
+      saveMode = SaveMode.Overwrite,
+      validate = false)
+    doWriteAndValidateDataAndRecordIndex(hudiOpts,
+      operation = DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL,
+      saveMode = SaveMode.Append,
+      validate = false)
+
+    val indexOpts = hudiOpts + {"path" -> basePath}
+    metaClient = HoodieTableMetaClient.reload(metaClient)
+    val fileIndex = HoodieFileIndex(spark, metaClient, None, indexOpts, 
includeLogFiles = true)
+    // random data filter
+    val filteredPartitionDirectories = fileIndex.listFiles(Seq(), 
Seq(EqualTo(attribute("_row_key"), Literal("abc"))))
+    val filteredFilesCount = filteredPartitionDirectories.flatMap(s => 
s.files).size
+    // Assert no pruning with complex record keys
+    assertEquals(getLatestDataFilesCount(indexOpts, includeLogFiles = false), 
filteredFilesCount)
+  }
 }

(hudi) branch branch-0.x updated: [HUDI-9666] Ensure no RLI partition pruning for complex keygen (#18271)

Reply via email to