This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new faba943d4927 [MINOR] Improve readability of isFullKey argument in 
index lookup (#13634)
faba943d4927 is described below

commit faba943d49278f24f2953e2ebaf8aa88a711ea0c
Author: Davis-Zhang-Onehouse 
<[email protected]>
AuthorDate: Tue Jul 29 13:09:16 2025 -0700

    [MINOR] Improve readability of isFullKey argument in index lookup (#13634)
    
    Co-authored-by: Y Ethan Guo <[email protected]>
---
 .../hudi/metadata/HoodieBackedTableMetadata.java   | 44 +++++++++++++---------
 ...estHoodieBackedTableMetadataBuildPredicate.java |  5 ++-
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index 1eeff4c6ed22..4ed2895602b1 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -42,8 +42,8 @@ import 
org.apache.hudi.common.model.HoodieRecordGlobalLocation;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.InstantRange;
 import org.apache.hudi.common.table.read.FileGroupReaderSchemaHandler;
-import org.apache.hudi.common.table.read.buffer.FileGroupRecordBufferLoader;
 import org.apache.hudi.common.table.read.HoodieFileGroupReader;
+import org.apache.hudi.common.table.read.buffer.FileGroupRecordBufferLoader;
 import 
org.apache.hudi.common.table.read.buffer.ReusableFileGroupRecordBufferLoader;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
@@ -316,12 +316,13 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
    */
   private HoodieData<HoodieRecord<HoodieMetadataPayload>> 
doLookupIndexRecords(HoodieData<String> keys, String partitionName, 
List<FileSlice> fileSlices,
                                                                                
SerializableFunctionUnchecked<String, String> keyEncodingFn) {
+    boolean isSecondaryIndex = 
MetadataPartitionType.fromPartitionPath(partitionName).equals(MetadataPartitionType.SECONDARY_INDEX);
 
     final int numFileSlices = fileSlices.size();
     if (numFileSlices == 1) {
       List<String> keysList = keys.map(keyEncodingFn::apply).collectAsList();
       TreeSet<String> distinctSortedKeys = new TreeSet<>(keysList);
-      return lookupRecords(partitionName, new ArrayList<>(distinctSortedKeys), 
fileSlices.get(0));
+      return lookupRecords(partitionName, new ArrayList<>(distinctSortedKeys), 
fileSlices.get(0), !isSecondaryIndex);
     }
 
     // For SI v2, there are 2 cases require different implementation:
@@ -350,7 +351,7 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
             distinctSortedKeyIter.forEachRemaining(keysList::add);
           }
           FileSlice fileSlice = 
fileSlices.get(mappingFunction.apply(keysList.get(0), numFileSlices));
-          return lookupRecordsItr(partitionName, keysList, fileSlice);
+          return lookupRecordsItr(partitionName, keysList, fileSlice, 
!isSecondaryIndex);
         };
     List<Integer> keySpace = IntStream.range(0, 
numFileSlices).boxed().collect(Collectors.toList());
     HoodieData<HoodieRecord<HoodieMetadataPayload>> result =
@@ -481,7 +482,6 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
     List<FileSlice> fileSlices = 
partitionFileSliceMap.computeIfAbsent(partitionName,
         k -> 
HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, 
getMetadataFileSystemView(), partitionName));
     checkState(!fileSlices.isEmpty(), "No file slices found for partition: " + 
partitionName);
-
     return doLookupIndexRecords(keys, partitionName, fileSlices, 
keyEncodingFn);
   }
 
@@ -594,9 +594,10 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
 
   private HoodieData<HoodieRecord<HoodieMetadataPayload>> lookupRecords(String 
partitionName,
                                                                         
List<String> sortedKeys,
-                                                                        
FileSlice fileSlice) {
+                                                                        
FileSlice fileSlice,
+                                                                        
boolean isFullKey) {
     List<HoodieRecord<HoodieMetadataPayload>> res = new ArrayList<>();
-    try (ClosableIterator<HoodieRecord<HoodieMetadataPayload>> iterator = 
lookupRecordsItr(partitionName, sortedKeys, fileSlice)) {
+    try (ClosableIterator<HoodieRecord<HoodieMetadataPayload>> iterator = 
lookupRecordsItr(partitionName, sortedKeys, fileSlice, isFullKey)) {
       iterator.forEachRemaining(res::add);
     }
     return HoodieListData.lazy(res);
@@ -605,22 +606,24 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
   private ClosableIterator<Pair<String, HoodieRecord<HoodieMetadataPayload>>> 
lookupKeyRecordPairsItr(String partitionName,
                                                                                
                       List<String> sortedKeys,
                                                                                
                       FileSlice fileSlice) {
+    boolean isSecondaryIndex = 
MetadataPartitionType.fromPartitionPath(partitionName).equals(MetadataPartitionType.SECONDARY_INDEX);
     return lookupRecords(partitionName, sortedKeys, fileSlice, metadataRecord 
-> {
       HoodieMetadataPayload payload = new 
HoodieMetadataPayload(Option.of(metadataRecord));
       String rowKey = payload.key != null ? payload.key : 
metadataRecord.get(KEY_FIELD_NAME).toString();
       HoodieKey hoodieKey = new HoodieKey(rowKey, partitionName);
       return Pair.of(rowKey, new HoodieAvroRecord<>(hoodieKey, payload));
-    }, true);
+    }, !isSecondaryIndex);
   }
 
   private ClosableIterator<HoodieRecord<HoodieMetadataPayload>> 
lookupRecordsItr(String partitionName,
                                                                                
  List<String> keys,
-                                                                               
  FileSlice fileSlice) {
+                                                                               
  FileSlice fileSlice,
+                                                                               
  boolean isFullKey) {
     return lookupRecords(partitionName, keys, fileSlice,
         metadataRecord -> {
           HoodieMetadataPayload payload = new 
HoodieMetadataPayload(Option.of(metadataRecord));
           return new HoodieAvroRecord<>(new HoodieKey(payload.key, 
partitionName), payload);
-        }, true);
+        }, isFullKey);
   }
 
   /**
@@ -663,20 +666,25 @@ public class HoodieBackedTableMetadata extends 
BaseTableMetadata {
    * @return A predicate for filtering records
    */
   static Predicate buildPredicate(String partitionName, List<String> 
sortedKeys, boolean isFullKey) {
-    if (MetadataPartitionType.fromPartitionPath(partitionName)
-          .equals(MetadataPartitionType.SECONDARY_INDEX)) {
-      // For secondary index, always use prefix matching
-      return Predicates.startsWithAny(null,
-          sortedKeys.stream()
-          .map(escapedKey -> escapedKey + SECONDARY_INDEX_RECORD_KEY_SEPARATOR)
-          .map(Literal::from)
-          .collect(Collectors.toList()));
-    } else if (isFullKey) {
+    if (isFullKey) {
+      ValidationUtils.checkArgument(
+          
!MetadataPartitionType.fromPartitionPath(partitionName).equals(MetadataPartitionType.SECONDARY_INDEX),
+          "Secondary index should never use full-key lookup");
       // For non-secondary index with full key matching
       return Predicates.in(null, sortedKeys.stream()
           .map(Literal::from)
           .collect(Collectors.toList()));
     } else {
+      // For secondary index we need an extra step of processing the key to 
lookup before handing it over to filegroup reader.
+      if (MetadataPartitionType.fromPartitionPath(partitionName)
+          .equals(MetadataPartitionType.SECONDARY_INDEX)) {
+        // For secondary index, always use prefix matching
+        return Predicates.startsWithAny(null,
+            sortedKeys.stream()
+                .map(escapedKey -> escapedKey + 
SECONDARY_INDEX_RECORD_KEY_SEPARATOR)
+                .map(Literal::from)
+                .collect(Collectors.toList()));
+      }
       // For non-secondary index with prefix matching
       return Predicates.startsWithAny(null, sortedKeys.stream()
           .map(Literal::from)
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataBuildPredicate.java
 
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataBuildPredicate.java
index fd995f900d1c..27446108018e 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataBuildPredicate.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataBuildPredicate.java
@@ -27,6 +27,7 @@ import java.util.Arrays;
 import java.util.List;
 
 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
@@ -41,9 +42,9 @@ public class TestHoodieBackedTableMetadataBuildPredicate {
     String partitionName = 
MetadataPartitionType.SECONDARY_INDEX.getPartitionPath();
     List<String> sortedKeys = Arrays.asList("key1", "key2", "key3");
     
-    Predicate predicateFullKey = 
HoodieBackedTableMetadata.buildPredicate(partitionName, sortedKeys, true);
+    Exception exception = assertThrows(IllegalArgumentException.class, () -> 
HoodieBackedTableMetadata.buildPredicate(partitionName, sortedKeys, true));
     // Verify it uses startsWithAny for secondary index
-    
assertTrue(predicateFullKey.getOperator().equals(Expression.Operator.STARTS_WITH));
+    assertTrue(exception.getMessage().contains("Secondary index should never 
use full-key lookup"));
     
     // Test case 2: Secondary index partition with isFullKey = false
     Predicate predicatePrefixKey = 
HoodieBackedTableMetadata.buildPredicate(partitionName, sortedKeys, false);

Reply via email to