This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch branch-0.x
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/branch-0.x by this push:
new 011c351d982f [HUDI-9666] Ensure no RLI partition pruning for complex
keygen (#18271)
011c351d982f is described below
commit 011c351d982f8f8fdf4afddc0f9557ab243d0ded
Author: Lokesh Jain <[email protected]>
AuthorDate: Thu Mar 5 10:21:06 2026 +0530
[HUDI-9666] Ensure no RLI partition pruning for complex keygen (#18271)
---------
Co-authored-by: Lokesh Jain <[email protected]>
---
.../org/apache/hudi/RecordLevelIndexSupport.scala | 4 +++
.../functional/TestRecordLevelIndexWithSQL.scala | 29 ++++++++++++++++++++++
2 files changed, 33 insertions(+)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
index 41e8db018618..bc35a0ed48d3 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/RecordLevelIndexSupport.scala
@@ -23,6 +23,7 @@ import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.keygen.KeyGenUtils
+import org.apache.hudi.keygen.constant.KeyGeneratorType
import org.apache.hudi.metadata.{HoodieTableMetadata, HoodieTableMetadataUtil}
import org.apache.hudi.storage.StoragePathInfo
import org.apache.hudi.util.JFunction
@@ -88,6 +89,9 @@ class RecordLevelIndexSupport(spark: SparkSession,
def filterQueriesWithRecordKey(queryFilters: Seq[Expression]):
(List[Expression], List[String]) = {
if (!isIndexAvailable ||
KeyGenUtils.mayUseNewEncodingForComplexKeyGen(metaClient.getTableConfig)) {
(List.empty, List.empty)
+ } else if
(KeyGeneratorType.isComplexKeyGenerator(metaClient.getTableConfig)) {
+ // Complex record keys filtering is not yet supported. Support was added
in HUDI-8432.
+ (List.empty, List.empty)
} else {
var recordKeyQueries: List[Expression] = List.empty
var recordKeys: List[String] = List.empty
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
index 97fdc1e10b21..d55ef5aaf2ae 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestRecordLevelIndexWithSQL.scala
@@ -17,6 +17,7 @@
package org.apache.hudi.functional
+import org.apache.hudi.DataSourceWriteOptions.RECORDKEY_FIELD
import org.apache.hudi.common.model.{FileSlice, HoodieTableType}
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.metadata.HoodieMetadataFileSystemView
@@ -188,4 +189,32 @@ class TestRecordLevelIndexWithSQL extends
RecordLevelIndexTestBase {
assertEquals(2,
spark.read.format("hudi").options(hudiOpts).load(dummyTablePath).filter("not_record_key_col
in ('row1', 'abc')").count())
}
+
+ @Test
+ def testRLINoPruningWithComplexRecordKeys(): Unit = {
+ var hudiOpts = commonOpts + {
+ RECORDKEY_FIELD.key -> "_row_key,rider"
+ }
+ hudiOpts = hudiOpts + (
+ DataSourceWriteOptions.TABLE_TYPE.key -> "COPY_ON_WRITE",
+ DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true")
+
+ doWriteAndValidateDataAndRecordIndex(hudiOpts,
+ operation = DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL,
+ saveMode = SaveMode.Overwrite,
+ validate = false)
+ doWriteAndValidateDataAndRecordIndex(hudiOpts,
+ operation = DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL,
+ saveMode = SaveMode.Append,
+ validate = false)
+
+ val indexOpts = hudiOpts + {"path" -> basePath}
+ metaClient = HoodieTableMetaClient.reload(metaClient)
+ val fileIndex = HoodieFileIndex(spark, metaClient, None, indexOpts,
includeLogFiles = true)
+ // random data filter
+ val filteredPartitionDirectories = fileIndex.listFiles(Seq(),
Seq(EqualTo(attribute("_row_key"), Literal("abc"))))
+ val filteredFilesCount = filteredPartitionDirectories.flatMap(s =>
s.files).size
+ // Assert no pruning with complex record keys
+ assertEquals(getLatestDataFilesCount(indexOpts, includeLogFiles = false),
filteredFilesCount)
+ }
}