This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch release-0.12.2-shadow
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 20956a21ac57366acdc75c81b0381a0e4ab10848
Author: LiChuang <[email protected]>
AuthorDate: Tue Dec 13 12:37:38 2022 +0800

    【HUDI-4917】Optimized the way to get HoodieBaseFile of 
loadColumnRangesFromFiles of Bloom Index (#6793)
---
 .../java/org/apache/hudi/index/bloom/HoodieBloomIndex.java  | 13 +++++++------
 .../main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java |  8 ++++++++
 .../src/main/java/org/apache/hudi/io/HoodieReadHandle.java  |  5 +++++
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
index 1417e40a9f5..57d9def9b42 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
@@ -26,6 +26,7 @@ import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.data.HoodiePairData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -161,19 +162,19 @@ public class HoodieBloomIndex extends HoodieIndex<Object, 
Object> {
   List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromFiles(
       List<String> partitions, final HoodieEngineContext context, final 
HoodieTable hoodieTable) {
     // Obtain the latest data files from all the partitions.
-    List<Pair<String, String>> partitionPathFileIDList = 
getLatestBaseFilesForAllPartitions(partitions, context, hoodieTable).stream()
-        .map(pair -> Pair.of(pair.getKey(), pair.getValue().getFileId()))
+    List<Pair<String, Pair<String, HoodieBaseFile>>> partitionPathFileIDList = 
getLatestBaseFilesForAllPartitions(partitions, context, hoodieTable).stream()
+        .map(pair -> Pair.of(pair.getKey(), 
Pair.of(pair.getValue().getFileId(), pair.getValue())))
         .collect(toList());
 
     context.setJobStatus(this.getClass().getName(), "Obtain key ranges for 
file slices (range pruning=on): " + config.getTableName());
     return context.map(partitionPathFileIDList, pf -> {
       try {
-        HoodieRangeInfoHandle rangeInfoHandle = new 
HoodieRangeInfoHandle(config, hoodieTable, pf);
-        String[] minMaxKeys = rangeInfoHandle.getMinMaxKeys();
-        return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue(), 
minMaxKeys[0], minMaxKeys[1]));
+        HoodieRangeInfoHandle rangeInfoHandle = new 
HoodieRangeInfoHandle(config, hoodieTable, Pair.of(pf.getKey(), 
pf.getValue().getKey()));
+        String[] minMaxKeys = 
rangeInfoHandle.getMinMaxKeys(pf.getValue().getValue());
+        return Pair.of(pf.getKey(), new 
BloomIndexFileInfo(pf.getValue().getKey(), minMaxKeys[0], minMaxKeys[1]));
       } catch (MetadataNotFoundException me) {
         LOG.warn("Unable to find range metadata in file :" + pf);
-        return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue()));
+        return Pair.of(pf.getKey(), new 
BloomIndexFileInfo(pf.getValue().getKey()));
       }
     }, Math.max(partitionPathFileIDList.size(), 1));
   }
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
index abe4a9befef..44407335081 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.io;
 
+import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -41,4 +42,11 @@ public class HoodieRangeInfoHandle<T extends 
HoodieRecordPayload, I, K, O> exten
       return reader.readMinMaxRecordKeys();
     }
   }
+
+  public String[] getMinMaxKeys(HoodieBaseFile baseFile) throws IOException {
+    try (HoodieFileReader reader = createNewFileReader(baseFile)) {
+      return reader.readMinMaxRecordKeys();
+    }
+  }
+
 }
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
index fee75b22dec..223241bc373 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
@@ -66,4 +66,9 @@ public abstract class HoodieReadHandle<T extends 
HoodieRecordPayload, I, K, O> e
     return HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(),
         new Path(getLatestDataFile().getPath()));
   }
+
+  protected HoodieFileReader createNewFileReader(HoodieBaseFile 
hoodieBaseFile) throws IOException {
+    return HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(),
+            new Path(hoodieBaseFile.getPath()));
+  }
 }

Reply via email to